From 5dc45657200ead776a13e302222cfdcf3d231057 Mon Sep 17 00:00:00 2001 From: Riliane Date: Wed, 20 Jul 2022 17:17:25 +0300 Subject: [PATCH 1/4] BEE-15265 avoid race condition causing flake --- .../org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java index bfe0c47f3..9ce0f999c 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java @@ -777,7 +777,8 @@ public void evaluate() throws Throwable { WorkflowRun run = createAndRunSleeperJob(story.j.jenkins, jobName, FlowDurabilityHint.MAX_SURVIVABILITY, false); FlowExecution exec = run.getExecution(); if (exec instanceof CpsFlowExecution) { - assert ((CpsFlowExecution) exec).getStorage().isPersistedFully(); + ((CpsFlowExecution) exec).waitForSuspension(); // till done writing head node ID into build.xml + assert ((CpsFlowExecution) exec).getStorage().isPersistedFully(); // single node xmls written } nodesOut.addAll(new DepthFirstScanner().allNodes(run.getExecution())); nodesOut.sort(FlowScanningUtils.ID_ORDER_COMPARATOR); From 30ae3c760cbcfa87218dd95cc997ef34ccb36347 Mon Sep 17 00:00:00 2001 From: Riliane Date: Mon, 25 Jul 2022 15:57:42 +0300 Subject: [PATCH 2/4] resolve flakes similar to previous --- .../org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java index 9ce0f999c..cfef6963b 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java @@ -716,6 +716,7 @@ public void evaluate() throws Throwable { WorkflowRun run = createAndRunSleeperJob(story.j.jenkins, jobName, FlowDurabilityHint.MAX_SURVIVABILITY, false); FlowExecution exec = run.getExecution(); if (exec instanceof CpsFlowExecution) { + ((CpsFlowExecution) exec).waitForSuspension(); // till done writing head node ID into build.xml assert ((CpsFlowExecution) exec).getStorage().isPersistedFully(); } logStart[0] = JenkinsRule.getLog(run); @@ -745,6 +746,7 @@ public void evaluate() throws Throwable { FlowExecution exec = run.getExecution(); Assert.assertTrue(((CpsFlowExecution) exec).isResumeBlocked()); if (exec instanceof CpsFlowExecution) { + ((CpsFlowExecution) exec).waitForSuspension(); // till done writing head node ID into build.xml assert ((CpsFlowExecution) exec).getStorage().isPersistedFully(); } Assert.assertFalse(((CpsFlowExecution) exec).getProgramDataFile().exists()); From 23bdce9f9686a409d8ff7317bfac2d22801e67bd Mon Sep 17 00:00:00 2001 From: Riliane Date: Wed, 3 Aug 2022 13:30:57 +0300 Subject: [PATCH 3/4] stop checking nodes altogether after a dirty shutdown --- .../workflow/cps/FlowDurabilityTest.java | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java index cfef6963b..17cce8a5b 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java @@ -250,6 +250,20 @@ static void verifySucceededCleanly(Jenkins j, WorkflowRun run) throws Exception assertHasTimingAction(run.getExecution()); } + /** A minimal check of resuming the job with no node checks, in case of a dirty shutdown. + * Currently dirty shutdowns have the potential to lose the head node ID if they happen at the moment of writing + * it to build.xml + */ + static void verifyDirtyResumed(JenkinsRule rule, WorkflowRun run, String logStart) throws Exception { + assert run.isBuilding(); + assertHasTimingAction(run.getExecution()); + rule.waitForCompletion(run); + Assert.assertEquals(Result.SUCCESS, run.getResult()); + verifyCompletedCleanly(rule.jenkins, run); + //no checking nodes + rule.assertLogContains(logStart, run); + } + /** If it's a {@link SemaphoreStep} we test less rigorously because that blocks async GraphListeners. */ static void verifySafelyResumed(JenkinsRule rule, WorkflowRun run, boolean isSemaphore, String logStart) throws Exception { assert run.isBuilding(); @@ -716,7 +730,6 @@ public void evaluate() throws Throwable { WorkflowRun run = createAndRunSleeperJob(story.j.jenkins, jobName, FlowDurabilityHint.MAX_SURVIVABILITY, false); FlowExecution exec = run.getExecution(); if (exec instanceof CpsFlowExecution) { - ((CpsFlowExecution) exec).waitForSuspension(); // till done writing head node ID into build.xml assert ((CpsFlowExecution) exec).getStorage().isPersistedFully(); } logStart[0] = JenkinsRule.getLog(run); @@ -727,7 +740,7 @@ public void evaluate() throws Throwable { @Override public void evaluate() throws Throwable { WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild(); - verifySafelyResumed(story.j, run, false, logStart[0]); + verifyDirtyResumed(story.j, run, logStart[0]); } }); } @@ -746,7 +759,6 @@ public void evaluate() throws Throwable { FlowExecution exec = run.getExecution(); Assert.assertTrue(((CpsFlowExecution) exec).isResumeBlocked()); if (exec instanceof CpsFlowExecution) { - ((CpsFlowExecution) exec).waitForSuspension(); // till done writing head node ID into build.xml assert ((CpsFlowExecution) exec).getStorage().isPersistedFully(); } Assert.assertFalse(((CpsFlowExecution) exec).getProgramDataFile().exists()); @@ -761,7 +773,6 @@ public void evaluate() throws Throwable { public void evaluate() throws Throwable { WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild(); verifyFailedCleanly(story.j.jenkins, run); - assertIncludesNodes(nodesOut, run); } }); } @@ -779,7 +790,6 @@ public void evaluate() throws Throwable { WorkflowRun run = createAndRunSleeperJob(story.j.jenkins, jobName, FlowDurabilityHint.MAX_SURVIVABILITY, false); FlowExecution exec = run.getExecution(); if (exec instanceof CpsFlowExecution) { - ((CpsFlowExecution) exec).waitForSuspension(); // till done writing head node ID into build.xml assert ((CpsFlowExecution) exec).getStorage().isPersistedFully(); // single node xmls written } nodesOut.addAll(new DepthFirstScanner().allNodes(run.getExecution())); @@ -793,7 +803,6 @@ public void evaluate() throws Throwable { public void evaluate() throws Throwable { WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild(); verifyFailedCleanly(story.j.jenkins, run); - assertIncludesNodes(nodesOut, run); } }); } From 37b9011380f5a2b8ad97f4128954ff7e7b9c5bb0 Mon Sep 17 00:00:00 2001 From: Riliane Date: Fri, 5 Aug 2022 15:52:17 +0300 Subject: [PATCH 4/4] final changes --- .../jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java index ea6effa74..6d119ca37 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java @@ -255,8 +255,7 @@ static void verifySucceededCleanly(Jenkins j, WorkflowRun run) throws Exception * Currently dirty shutdowns have the potential to lose the head node ID if they happen at the moment of writing * it to build.xml */ - static void verifyDirtyResumed(JenkinsRule rule, WorkflowRun run, String logStart) throws Exception { - assert run.isBuilding(); + private static void verifyDirtyResumed(JenkinsRule rule, WorkflowRun run, String logStart) throws Exception { assertHasTimingAction(run.getExecution()); rule.waitForCompletion(run); Assert.assertEquals(Result.SUCCESS, run.getResult()); @@ -266,8 +265,8 @@ static void verifyDirtyResumed(JenkinsRule rule, WorkflowRun run, String logStar } /** If it's a {@link SemaphoreStep} we test less rigorously because that blocks async GraphListeners. */ - static void verifySafelyResumed(JenkinsRule rule, WorkflowRun run, boolean isSemaphore, String logStart) throws Exception { - assert run.isBuilding(); + private static void verifySafelyResumed(JenkinsRule rule, WorkflowRun run, boolean isSemaphore, String logStart) throws Exception { + Assert.assertTrue(run.isBuilding()); FlowExecution exec = run.getExecution(); // Assert that we have the appropriate flow graph entries