diff --git a/pom.xml b/pom.xml index 8f3c81b7..634480d9 100644 --- a/pom.xml +++ b/pom.xml @@ -94,6 +94,7 @@ org.jenkins-ci.plugins.workflow workflow-api + 1200.v8005c684b_a_c6 org.jenkins-ci.plugins.workflow diff --git a/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java b/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java index d6dbcf41..fea45e67 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java @@ -81,7 +81,6 @@ import org.jenkinsci.plugins.workflow.steps.StepExecution; import org.jenkinsci.plugins.workflow.support.concurrent.Timeout; import org.jenkinsci.plugins.workflow.support.concurrent.WithThreadName; -import org.jenkinsci.plugins.workflow.support.pickles.ExecutorPickle; import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution; import org.kohsuke.accmod.Restricted; import org.kohsuke.accmod.restrictions.DoNotUse; @@ -306,6 +305,9 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab returnStatus = step.returnStatus; StepContext context = getContext(); ws = context.get(FilePath.class); + if (ws == null) { + throw new AbortException("No workspace currently accessible"); + } node = FilePathUtils.getNodeName(ws); DurableTask durableTask = step.task(); if (returnStdout) { @@ -356,12 +358,13 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab LOGGER.fine(() -> "discovered that " + node + " has been removed"); removedNodeDiscovered = System.nanoTime(); return null; - } else if (System.nanoTime() - removedNodeDiscovered < TimeUnit.MILLISECONDS.toNanos(ExecutorPickle.TIMEOUT_WAITING_FOR_NODE_MILLIS)) { + } else if (System.nanoTime() - removedNodeDiscovered < TimeUnit.MILLISECONDS.toNanos(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS)) { LOGGER.fine(() -> "rediscovering that " + node + " has been removed"); return null; } else { - LOGGER.fine(() -> node + " has been removed for a while, assuming it is not coming back"); - throw new FlowInterruptedException(Result.ABORTED, new ExecutorStepExecution.RemovedNodeCause()); + LOGGER.fine(() -> "rediscovering that " + node + " has been removed and timeout has expired"); + listener().getLogger().println(node + " has been removed for " + Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS) + ", assuming it is not coming back"); + throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeCause()); } } removedNodeDiscovered = 0; // something else; reset diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ComputerPickle.java b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ComputerPickle.java index bdfa9219..9ee9b064 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ComputerPickle.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ComputerPickle.java @@ -31,12 +31,14 @@ import hudson.model.Computer; import jenkins.model.Jenkins; import org.jenkinsci.plugins.workflow.flow.FlowExecutionOwner; +import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepDynamicContext; /** * Reference to {@link Computer} * - * @author Kohsuke Kawaguchi + * @deprecated Normally now done via {@link ExecutorStepDynamicContext}. */ +@Deprecated public class ComputerPickle extends Pickle { private final String slave; diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java index 79bcb9a2..f24ac769 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java @@ -26,9 +26,7 @@ import com.google.common.util.concurrent.ListenableFuture; import edu.umd.cs.findbugs.annotations.NonNull; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import hudson.Extension; -import hudson.Main; import hudson.Util; import hudson.init.InitMilestone; import hudson.model.Executor; @@ -41,7 +39,6 @@ import hudson.model.queue.SubTask; import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; @@ -49,27 +46,23 @@ import org.jenkinsci.plugins.workflow.flow.FlowExecutionOwner; import org.jenkinsci.plugins.workflow.pickles.Pickle; import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException; -import org.jenkinsci.plugins.workflow.steps.durable_task.Messages; +import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepDynamicContext; import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution; -import org.kohsuke.accmod.Restricted; -import org.kohsuke.accmod.restrictions.NoExternalUse; /** * Persists an {@link Executor} as the {@link hudson.model.Queue.Task} it was running. * That task can in turn have some way of producing a display name, a special {@link hudson.model.Queue.Executable} with a custom {@code executorCell.jelly}, and so on. * When rehydrated, the task is rescheduled, and when it starts executing the owning executor is produced. * Typically the {@link SubTask#getAssignedLabel} should be a {@link Node#getSelfLabel} so that the rehydrated executor is in fact on the same node. + * @deprecated Normally now done via {@link ExecutorStepDynamicContext}. */ +@Deprecated public class ExecutorPickle extends Pickle { private static final Logger LOGGER = Logger.getLogger(ExecutorPickle.class.getName()); private final Queue.Task task; - @SuppressFBWarnings(value = "MS_SHOULD_BE_FINAL", justification = "deliberately mutable") - @Restricted(NoExternalUse.class) - public static long TIMEOUT_WAITING_FOR_NODE_MILLIS = Main.isUnitTest ? /* fail faster */ TimeUnit.SECONDS.toMillis(15) : Long.getLong(ExecutorPickle.class.getName()+".timeoutForNodeMillis", TimeUnit.MINUTES.toMillis(5)); - private ExecutorPickle(Executor executor) { if (executor instanceof OneOffExecutor) { throw new IllegalArgumentException("OneOffExecutor not currently supported"); @@ -105,7 +98,7 @@ protected Executor tryResolve() throws Exception { throw new IllegalStateException("queue refused " + task); } itemID = item.getId(); - endTimeNanos = System.nanoTime() + TIMEOUT_WAITING_FOR_NODE_MILLIS*1000000; + endTimeNanos = System.nanoTime() + ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS*1000000; LOGGER.log(Level.FINE, "{0} scheduled {1}", new Object[] {ExecutorPickle.this, item}); } else { item = Queue.getInstance().getItem(itemID); @@ -129,7 +122,7 @@ protected Executor tryResolve() throws Exception { if (System.nanoTime() > endTimeNanos) { Queue.getInstance().cancel(item); owner.getListener().getLogger().printf("Killed %s after waiting for %s because we assume unknown agent %s is never going to appear%n", - item.task.getDisplayName(), Util.getTimeSpanString(TIMEOUT_WAITING_FOR_NODE_MILLIS), placeholder.getAssignedLabel()); + item.task.getDisplayName(), Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS), placeholder.getAssignedLabel()); throw new FlowInterruptedException(Result.ABORTED, new ExecutorStepExecution.RemovedNodeCause()); } } @@ -159,7 +152,7 @@ protected Executor tryResolve() throws Exception { } @Override protected void printWaitingMessage(@NonNull TaskListener listener) { Queue.Item item = Queue.getInstance().getItem(itemID); - String message = Messages.ExecutorPickle_waiting_to_resume(task.getFullDisplayName()); + String message = "Waiting to resume " + task.getFullDisplayName(); if (item == null) { // ??? listener.getLogger().println(message); return; diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/FilePathPickle.java b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/FilePathPickle.java index a6d66045..a25032e9 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/FilePathPickle.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/FilePathPickle.java @@ -31,10 +31,14 @@ import org.jenkinsci.plugins.workflow.FilePathUtils; import org.jenkinsci.plugins.workflow.flow.FlowExecutionOwner; import org.jenkinsci.plugins.workflow.pickles.Pickle; +import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepDynamicContext; +import org.jenkinsci.plugins.workflow.support.steps.FilePathDynamicContext; /** * @author Kohsuke Kawaguchi + * @deprecated Normally now done via {@link ExecutorStepDynamicContext} or {@link FilePathDynamicContext}. */ +@Deprecated public class FilePathPickle extends Pickle { private final String slave; diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/WorkspaceListLeasePickle.java b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/WorkspaceListLeasePickle.java index 332c0de7..d1c0f56e 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/WorkspaceListLeasePickle.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/WorkspaceListLeasePickle.java @@ -35,7 +35,12 @@ import org.jenkinsci.plugins.workflow.FilePathUtils; import org.jenkinsci.plugins.workflow.flow.FlowExecutionOwner; import org.jenkinsci.plugins.workflow.pickles.Pickle; +import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepDynamicContext; +/** + * @deprecated Normally now done via {@link ExecutorStepDynamicContext}. + */ +@Deprecated public class WorkspaceListLeasePickle extends Pickle { // Could perhaps just store the FilePath directly (thus using FilePathPickle implicitly), but we need the Computer anyway for its WorkspaceList: diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java new file mode 100644 index 00000000..c7dbe91c --- /dev/null +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java @@ -0,0 +1,241 @@ +/* + * The MIT License + * + * Copyright 2021 CloudBees, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package org.jenkinsci.plugins.workflow.support.steps; + +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; +import hudson.Extension; +import hudson.FilePath; +import hudson.Util; +import hudson.model.Computer; +import hudson.model.Executor; +import hudson.model.Node; +import hudson.model.Queue; +import hudson.model.Result; +import hudson.model.TaskListener; +import hudson.remoting.VirtualChannel; +import hudson.slaves.OfflineCause; +import hudson.slaves.WorkspaceList; +import java.io.IOException; +import java.io.Serializable; +import java.util.concurrent.CancellationException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; +import jenkins.model.Jenkins; +import org.jenkinsci.plugins.workflow.FilePathUtils; +import org.jenkinsci.plugins.workflow.steps.DynamicContext; +import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException; +import org.jenkinsci.plugins.workflow.steps.StepContext; +import org.jenkinsci.plugins.workflow.support.DefaultStepContext; +import org.kohsuke.accmod.Restricted; +import org.kohsuke.accmod.restrictions.NoExternalUse; + +/** + * Persistent representation for context of {@link ExecutorStepExecution}. + * Supersedes {@link FilePathDynamicContext} (never mind {@link org.jenkinsci.plugins.workflow.support.pickles.FilePathPickle}), + * {@link org.jenkinsci.plugins.workflow.support.pickles.ExecutorPickle}, + * {@link org.jenkinsci.plugins.workflow.support.pickles.ComputerPickle}, + * and {@link org.jenkinsci.plugins.workflow.support.pickles.WorkspaceListLeasePickle}. + */ +@Restricted(NoExternalUse.class) +public final class ExecutorStepDynamicContext implements Serializable { + + private static final Logger LOGGER = Logger.getLogger(ExecutorStepDynamicContext.class.getName()); + + private static final long serialVersionUID = 1; + + final @NonNull ExecutorStepExecution.PlaceholderTask task; + final @NonNull String node; + private final @NonNull String path; + /** Non-null after {@link #resume} if all goes well. */ + private transient @Nullable Executor executor; + /** Non-null after {@link #resume} if all goes well. */ + private transient @Nullable WorkspaceList.Lease lease; + + ExecutorStepDynamicContext(ExecutorStepExecution.PlaceholderTask task, WorkspaceList.Lease lease, Executor executor) { + this.task = task; + this.node = FilePathUtils.getNodeName(lease.path); + this.path = lease.path.getRemote(); + this.executor = executor; + this.lease = lease; + } + + void resume(StepContext context) throws Exception { + if (executor != null) { + throw new IllegalStateException("Already resumed"); + } + Queue.Item item = Queue.getInstance().schedule2(task, 0).getItem(); + if (item == null) { + // TODO should also report when !ScheduleResult.created, since that is arguably an error + throw new IllegalStateException("queue refused " + task); + } + LOGGER.fine(() -> "scheduled " + item + " for " + path + " on " + node); + TaskListener listener = context.get(TaskListener.class); + if (!node.isEmpty()) { // unlikely to be any delay for built-in node anyway + listener.getLogger().println("Waiting for reconnection of " + node + " before proceeding with build"); + } + Queue.Executable exec; + try { + exec = item.getFuture().getStartCondition().get(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); + } catch (TimeoutException x) { + listener.getLogger().println(node + " has been removed for " + Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS) + ", assuming it is not coming back"); + throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeCause()); + } catch (CancellationException x) { + LOGGER.log(Level.FINE, "ceased to wait for " + node, x); + throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.QueueTaskCancelled()); + } + executor = Executor.of(exec); + if (executor == null) { + // TODO this could happen as a race condition if the executable takes <1s to run; how could that be prevented? + // Or can we schedule a placeholder Task whose Executable does nothing but return Executor.currentExecutor and then end? + throw new IOException(exec + " was scheduled but no executor claimed it"); + } + Computer computer = executor.getOwner(); + VirtualChannel channel = computer.getChannel(); + if (channel == null) { + throw new IOException(computer + " is offline"); + } + FilePath fp = new FilePath(channel, path); + // Since there is no equivalent to Lock.tryLock for WorkspaceList (.record would work but throws AssertionError and swaps the holder): + WorkspaceList.Lease _lease = computer.getWorkspaceList().allocate(fp); + if (_lease.path.equals(fp)) { + lease = _lease; + } else { // @2 or other variant, not what we expected to be able to lock without contention + _lease.release(); + throw new IOException("JENKINS-37121: something already locked " + fp); + } + LOGGER.fine(() -> "fully restored for " + path + " on " + node); + } + + private static abstract class Translator extends DynamicContext.Typed { + + @Override protected T get(DelegatedContext context) throws IOException, InterruptedException { + ExecutorStepDynamicContext c = context.get(ExecutorStepDynamicContext.class); + if (c == null || c.lease == null) { + return null; + } + return get(c); + } + + abstract T get(ExecutorStepDynamicContext c) throws IOException, InterruptedException; + + } + + @Extension public static final class FilePathTranslator extends Translator { + + @Override protected Class type() { + return FilePath.class; + } + + @Override FilePath get(ExecutorStepDynamicContext c) throws IOException { + if (c.lease.path.toComputer() == null) { + FilePath f = FilePathUtils.find(c.node, c.path); + if (f != null) { + LOGGER.fine(() -> c.node + " disconnected and reconnected; getting a new FilePath on " + c.path + " with the new Channel"); + return f; + } + String message = "Unable to create live FilePath for " + c.node; + Computer comp = Jenkins.get().getComputer(c.node); + if (comp != null) { + OfflineCause oc = comp.getOfflineCause(); + if (oc != null) { + message += "; " + comp.getDisplayName() + " was marked offline: " + oc; + } + } + AgentOfflineException e = new AgentOfflineException(message); + if (comp != null) { + for (Computer.TerminationRequest tr : comp.getTerminatedBy()) { + e.addSuppressed(tr); + } + } + throw e; + } + return c.lease.path; + } + + } + + @Extension public static final class WorkspaceListLeaseTranslator extends Translator { + + @Override protected Class type() { + return WorkspaceList.Lease.class; + } + + @Override WorkspaceList.Lease get(ExecutorStepDynamicContext c) { + // Do not do a liveness check as in FilePathTranslator. + // We could not do anything about a stale .path even if we found out about it. + return c.lease; + } + + } + + @Extension public static final class ExecutorTranslator extends Translator { + + @Override protected Class type() { + return Executor.class; + } + + @Override Executor get(ExecutorStepDynamicContext c) { + return c.executor; + } + + } + + @Extension public static final class ComputerTranslator extends Translator { + + @Override protected Class type() { + return Computer.class; + } + + @Override Computer get(ExecutorStepDynamicContext c) { + return c.executor.getOwner(); + } + + } + + /** + * Need not use {@link Translator} since we can serve a {@link Node} even when offline. + * Overrides default behavior in {@link DefaultStepContext} which would delegate to {@link ComputerTranslator}. + */ + @Extension public static final class NodeTranslator extends DynamicContext.Typed { + + @Override protected Class type() { + return Node.class; + } + + @Override protected Node get(DelegatedContext context) throws IOException, InterruptedException { + ExecutorStepDynamicContext c = context.get(ExecutorStepDynamicContext.class); + if (c == null) { + return null; + } + Jenkins j = Jenkins.get(); + return c.node.isEmpty() ? j : j.getNode(c.node); + } + + } + +} diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java index 35524bf9..d00d213e 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java @@ -1,13 +1,18 @@ package org.jenkinsci.plugins.workflow.support.steps; +import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.MoreExecutors; import edu.umd.cs.findbugs.annotations.CheckForNull; import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.Nullable; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import hudson.EnvVars; import hudson.Extension; +import hudson.ExtensionList; import hudson.FilePath; import hudson.Launcher; +import hudson.Main; import hudson.Util; import hudson.console.ModelHyperlinkNote; import hudson.model.Computer; @@ -36,7 +41,6 @@ import hudson.slaves.WorkspaceList; import java.io.IOException; import java.io.Serializable; -import java.lang.ref.WeakReference; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -45,6 +49,7 @@ import java.util.Map; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; import java.util.logging.Level; import static java.util.logging.Level.*; import java.util.logging.Logger; @@ -63,6 +68,7 @@ import org.jenkinsci.plugins.workflow.actions.LabelAction; import org.jenkinsci.plugins.workflow.actions.QueueItemAction; import org.jenkinsci.plugins.workflow.actions.ThreadNameAction; +import org.jenkinsci.plugins.workflow.flow.FlowExecution; import org.jenkinsci.plugins.workflow.flow.FlowExecutionList; import org.jenkinsci.plugins.workflow.graph.FlowNode; import org.jenkinsci.plugins.workflow.steps.AbstractStepExecutionImpl; @@ -70,11 +76,10 @@ import org.jenkinsci.plugins.workflow.steps.BodyExecutionCallback; import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException; import org.jenkinsci.plugins.workflow.steps.StepContext; +import org.jenkinsci.plugins.workflow.steps.StepExecution; import org.jenkinsci.plugins.workflow.steps.durable_task.Messages; import org.jenkinsci.plugins.workflow.support.actions.WorkspaceActionImpl; import org.jenkinsci.plugins.workflow.support.concurrent.Timeout; -import org.jenkinsci.plugins.workflow.support.pickles.ExecutorPickle; -import org.jenkinsci.plugins.workflow.support.pickles.WorkspaceListLeasePickle; import org.kohsuke.accmod.Restricted; import org.kohsuke.accmod.restrictions.DoNotUse; import org.kohsuke.accmod.restrictions.NoExternalUse; @@ -84,7 +89,17 @@ public class ExecutorStepExecution extends AbstractStepExecutionImpl { + @SuppressFBWarnings(value = "MS_SHOULD_BE_FINAL", justification = "deliberately mutable") + @Restricted(value = NoExternalUse.class) + public static long TIMEOUT_WAITING_FOR_NODE_MILLIS = Main.isUnitTest ? /* fail faster */ TimeUnit.SECONDS.toMillis(15) : Long.getLong("org.jenkinsci.plugins.workflow.support.pickles.ExecutorPickle.timeoutForNodeMillis", TimeUnit.MINUTES.toMillis(5)); + private final ExecutorStep step; + private ExecutorStepDynamicContext state; + + /** + * Needed for {@link BodyExecution#cancel} in certain scenarios. + */ + private @CheckForNull BodyExecution body; ExecutorStepExecution(StepContext context, ExecutorStep step) { super(context); @@ -182,35 +197,13 @@ public void stop(@NonNull Throwable cause) throws Exception { } @Override public void onResume() { - super.onResume(); - // See if we are still running, or scheduled to run. Cf. stop logic above. try { - Run run = getContext().get(Run.class); - for (Queue.Item item : Queue.getInstance().getItems()) { - if (item.task instanceof PlaceholderTask && ((PlaceholderTask) item.task).context.equals(getContext())) { - LOGGER.log(FINE, "Queue item for node block in {0} is still waiting after reload", run); - return; - } - } - Jenkins j = Jenkins.getInstanceOrNull(); - if (j != null) { - for (Computer c : j.getComputers()) { - for (Executor e : c.getExecutors()) { - Queue.Executable exec = e.getCurrentExecutable(); - if (exec instanceof PlaceholderTask.PlaceholderExecutable && ((PlaceholderTask.PlaceholderExecutable) exec).getParent().context.equals(getContext())) { - LOGGER.log(FINE, "Node block in {0} is running on {1} after reload", new Object[] {run, c.getName()}); - return; - } - } - } - } - TaskListener listener = getContext().get(TaskListener.class); - if (step == null) { // compatibility: used to be transient - listener.getLogger().println("Queue item for node block in " + run.getFullDisplayName() + " is missing (perhaps JENKINS-34281), but cannot reschedule"); + if (state == null) { + Run run = getContext().get(Run.class); + LOGGER.fine(() -> "No ExecutorStepDynamicContext found for node block in " + run + "; perhaps loading from a historical build record, hoping for the best"); return; } - listener.getLogger().println("Queue item for node block in " + run.getFullDisplayName() + " is missing (perhaps JENKINS-34281); rescheduling"); - start(); + state.resume(getContext()); } catch (Exception x) { // JENKINS-40161 getContext().onFailure(x); } @@ -275,26 +268,29 @@ public static final class QueueTaskCancelled extends CauseOfInterruption { Queue.Executable exec = e.getCurrentExecutable(); if (exec instanceof PlaceholderTask.PlaceholderExecutable) { PlaceholderTask task = ((PlaceholderTask.PlaceholderExecutable) exec).getParent(); - TaskListener listener = TaskListener.NULL; + TaskListener listener; try { listener = task.context.get(TaskListener.class); } catch (Exception x) { LOGGER.log(Level.WARNING, null, x); - } - BodyExecution body = task.body != null ? task.body.get() : null; - if (body == null) { - listener.getLogger().println("Agent " + node.getNodeName() + " was deleted, but do not have a node body to cancel"); continue; } - listener.getLogger().println("Agent " + node.getNodeName() + " was deleted; cancelling node body"); - if (Util.isOverridden(BodyExecution.class, body.getClass(), "cancel", Throwable.class)) { - body.cancel(new FlowInterruptedException(Result.ABORTED, false, new RemovedNodeCause())); - } else { // TODO remove once https://github.com/jenkinsci/workflow-cps-plugin/pull/570 is widely deployed - body.cancel(new RemovedNodeCause()); - } + task.withExecution(execution -> { + BodyExecution body = execution.body; + if (body == null) { + listener.getLogger().println("Agent " + node.getNodeName() + " was deleted, but do not have a node body to cancel"); + return; + } + listener.getLogger().println("Agent " + node.getNodeName() + " was deleted; cancelling node body"); + if (Util.isOverridden(BodyExecution.class, body.getClass(), "cancel", Throwable.class)) { + body.cancel(new FlowInterruptedException(Result.ABORTED, false, new RemovedNodeCause())); + } else { // TODO remove once https://github.com/jenkinsci/workflow-cps-plugin/pull/570 is widely deployed + body.cancel(new RemovedNodeCause()); + } + }); } } - }, ExecutorPickle.TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); + }, TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); } } @@ -336,17 +332,6 @@ public static final class PlaceholderTask implements ContinuedTask, Serializable */ private String cookie; - /** - * Needed for {@link BodyExecution#cancel}. - * {@code transient} because we cannot save a {@link BodyExecution} in {@link PlaceholderTask}: - * {@link ExecutorPickle} is written to the stream first, which holds a {@link PlaceholderTask}, - * and the {@link BodyExecution} holds {@link PlaceholderTask.Callback} whose {@link WorkspaceList.Lease} - * is not processed by {@link WorkspaceListLeasePickle} since pickles are not recursive. - * So we make a best effort and only try to cancel a body within the current session. - * @see RemovedNodeListener - */ - private transient @CheckForNull WeakReference body; - /** {@link Authentication#getName} of user of build, if known. */ private final @CheckForNull String auth; @@ -376,6 +361,30 @@ private Object readResolve() { return this; } + /** + * We cannot keep {@link ExecutorStepExecution} as a serial field of {@link PlaceholderTask} + * since it could not be serialized via XStream in {@link Queue}. + * Instead we keep only {@link #context} and look up the execution as needed. + */ + private void withExecution(Consumer executionCallback) { + try { + Futures.addCallback(context.get(FlowExecution.class).getCurrentExecutions(false), new FutureCallback>() { + @Override public void onSuccess(List result) { + for (StepExecution execution : result) { + if (execution instanceof ExecutorStepExecution && execution.getContext().equals(context)) { + executionCallback.accept((ExecutorStepExecution) execution); + } + } + } + @Override public void onFailure(Throwable x) { + LOGGER.log(Level.WARNING, null, x); + } + }, MoreExecutors.directExecutor()); + } catch (IOException | InterruptedException x) { + LOGGER.log(Level.WARNING, null, x); + } + } + /** * Gives {@link FlowNode}, waiting to be executed in build {@link Queue}. * @@ -528,7 +537,7 @@ public String getShortDescription() { } return context.get(Run.class); } catch (Exception x) { - LOGGER.log(FINE, "broken " + cookie, x); + LOGGER.log(FINE, "broken " + cookie + " in " + runId, x); finish(cookie); // probably broken, so just shut it down return null; } @@ -734,7 +743,7 @@ private String computeEnclosingLabel(FlowNode executorStepNode, List h @Override public Authentication authenticate(Queue.Task task) { if (task instanceof PlaceholderTask) { String auth = ((PlaceholderTask) task).auth; - LOGGER.log(FINE, "authenticating {0}", task); + LOGGER.finer(() -> "authenticating " + task); if (Jenkins.ANONYMOUS.getName().equals(auth)) { return Jenkins.ANONYMOUS; } else if (auth != null) { @@ -793,19 +802,45 @@ private static void finish(@CheckForNull final String cookie) { @SuppressFBWarnings(value="SE_BAD_FIELD", justification="lease is pickled") private static final class Callback extends BodyExecutionCallback.TailCall { + private static final long serialVersionUID = -1357584128994454363L; + private final String cookie; + @Deprecated private WorkspaceList.Lease lease; + private final ExecutorStepExecution execution; - Callback(String cookie, WorkspaceList.Lease lease) { + Callback(String cookie, ExecutorStepExecution execution) { this.cookie = cookie; - this.lease = lease; + this.execution = execution; } @Override protected void finished(StepContext context) throws Exception { LOGGER.log(FINE, "finished {0}", cookie); - lease.release(); - lease = null; - finish(cookie); + try { + if (execution != null) { + WorkspaceList.Lease _lease = ExtensionList.lookupSingleton(ExecutorStepDynamicContext.WorkspaceListLeaseTranslator.class).get(execution.state); + if (_lease != null) { + _lease.release(); + } + } else { + lease.release(); + lease = null; + } + } finally { + finish(cookie); + } + if (execution != null) { + execution.body = null; + boolean _stopping = execution.state.task.stopping; + execution.state.task.stopping = true; + try { + Queue.getInstance().cancel(execution.state.task); + } finally { + execution.state.task.stopping = _stopping; + } + execution.state = null; + context.saveState(); + } } } @@ -838,7 +873,7 @@ private final class PlaceholderExecutable implements ContinuableExecutable, Acce if (cookie == null) { // First time around. cookie = UUID.randomUUID().toString(); - // Switches the label to a self-label, so if the executable is killed and restarted via ExecutorPickle, it will run on the same node: + // Switches the label to a self-label, so if the executable is killed and restarted, it will run on the same node: label = computer.getName(); EnvVars env = computer.getEnvironment(); @@ -878,15 +913,19 @@ private final class PlaceholderExecutable implements ContinuableExecutable, Acce flowNode.addAction(new WorkspaceActionImpl(workspace, flowNode)); } listener.getLogger().println("Running on " + ModelHyperlinkNote.encodeTo(node) + " in " + workspace); - body = new WeakReference<>(context.newBodyInvoker() - .withContexts(exec, computer, env, - FilePathDynamicContext.createContextualObject(workspace)) - .withCallback(new Callback(cookie, lease)) - .start()); - LOGGER.log(FINE, "started {0}", cookie); + ExecutorStepDynamicContext state = new ExecutorStepDynamicContext(PlaceholderTask.this, lease, exec); + withExecution(execution -> { + execution.state = state; + execution.body = context.newBodyInvoker() + .withContexts(env, state) + .withCallback(new Callback(cookie, execution)) + .start(); + LOGGER.fine(() -> "started " + cookie + " in " + runId); + context.saveState(); + }); } else { // just rescheduled after a restart; wait for task to complete - LOGGER.log(FINE, "resuming {0}", cookie); + LOGGER.fine(() -> "resuming " + cookie + " in " + runId); } } catch (Exception x) { if (computer != null) { @@ -905,10 +944,10 @@ private final class PlaceholderExecutable implements ContinuableExecutable, Acce } // wait until the invokeBodyLater call above completes and notifies our Callback object synchronized (runningTasks) { - LOGGER.log(FINE, "waiting on {0}", cookie); + LOGGER.fine(() -> "waiting on " + cookie + " in " + runId); RunningTask runningTask = runningTasks.get(cookie); if (runningTask == null) { - LOGGER.log(FINE, "running task apparently finished quickly for {0}", cookie); + LOGGER.fine(() -> "running task apparently finished quickly for " + cookie + " in " + runId); return; } assert runningTask.execution == null; @@ -920,19 +959,20 @@ private final class PlaceholderExecutable implements ContinuableExecutable, Acce if (forShutdown) { return; } - LOGGER.log(FINE, "interrupted {0}", cookie); - // TODO save the BodyExecution somehow and call .cancel() here; currently we just interrupt the build as a whole: + LOGGER.fine(() -> "interrupted " + cookie + " in " + runId); Timer.get().submit(() -> { // JENKINS-46738 - Executor masterExecutor = r.getExecutor(); - if (masterExecutor != null) { - masterExecutor.interrupt(); - } else { // anomalous state; perhaps build already aborted but this was left behind; let user manually cancel executor slot - Executor thisExecutor = /* AsynchronousExecution. */getExecutor(); - if (thisExecutor != null) { - thisExecutor.recordCauseOfInterruption(r, _listener); + Executor thisExecutor = /* AsynchronousExecution. */ getExecutor(); + withExecution(execution -> { + BodyExecution body = execution.body; + if (body != null) { + body.cancel(thisExecutor != null ? thisExecutor.getCausesOfInterruption().toArray(new CauseOfInterruption[0]) : new CauseOfInterruption[0]); + } else { // anomalous state; perhaps build already aborted but this was left behind; let user manually cancel executor slot + if (thisExecutor != null) { + thisExecutor.recordCauseOfInterruption(r, _listener); + } + completed(null); } - completed(null); - } + }); }); } @Override public boolean blocksRestart() { diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/FilePathDynamicContext.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/FilePathDynamicContext.java index ac7ac83c..9870e11a 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/FilePathDynamicContext.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/FilePathDynamicContext.java @@ -45,7 +45,7 @@ * Allows a step body to save a representation of a workspace * without forcing a particular {@link FilePath#getChannel} to be used the whole time. */ -@Extension public final class FilePathDynamicContext extends DynamicContext.Typed { +@Extension(ordinal = 100) public final class FilePathDynamicContext extends DynamicContext.Typed { private static final Logger LOGGER = Logger.getLogger(FilePathDynamicContext.class.getName()); diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/WorkspaceStepExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/WorkspaceStepExecution.java index 2d2ca789..81968320 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/WorkspaceStepExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/WorkspaceStepExecution.java @@ -13,6 +13,8 @@ import hudson.slaves.WorkspaceList; import java.util.HashMap; import java.util.Map; +import java.util.logging.Logger; +import org.jenkinsci.plugins.workflow.FilePathUtils; import org.jenkinsci.plugins.workflow.graph.FlowNode; import org.jenkinsci.plugins.workflow.steps.AbstractStepExecutionImpl; import org.jenkinsci.plugins.workflow.steps.BodyExecutionCallback; @@ -22,6 +24,8 @@ public class WorkspaceStepExecution extends AbstractStepExecutionImpl { + private static final Logger LOGGER = Logger.getLogger(WorkspaceStepExecution.class.getName()); + @SuppressFBWarnings(value="SE_TRANSIENT_FIELD_NOT_RESTORED", justification="only used from #start") private transient final String dir; @@ -87,19 +91,45 @@ private ExpanderImpl(String path) { } } - @SuppressFBWarnings(value="SE_BAD_FIELD", justification="lease is pickled") private static final class Callback extends BodyExecutionCallback.TailCall { - private final WorkspaceList.Lease lease; + private transient WorkspaceList.Lease lease; + private final String node; + private final String path; Callback(WorkspaceList.Lease lease) { this.lease = lease; + node = FilePathUtils.getNodeName(lease.path); + path = lease.path.getRemote(); } @Override protected void finished(StepContext context) throws Exception { + if (lease == null) { // after restart, unless using historical pickled version + FilePath fp = FilePathUtils.find(node, path); + if (fp == null) { + LOGGER.fine(() -> "can no longer find " + path + " on " + node + " to release"); + // Should be harmless: no one could be waiting for a lock if the agent is not even online anyway. + return; + } + LOGGER.fine(() -> "recreating lease on " + fp); + Computer c = fp.toComputer(); + if (c == null) { + LOGGER.warning(() -> node + " no longer connected"); + return; + } + // See ExecutorStepDynamicContext for background: + lease = c.getWorkspaceList().allocate(fp); + if (!lease.path.equals(fp)) { + lease.release(); + LOGGER.warning(() -> "JENKINS-37121: something already locked " + fp + " != " + lease.path); + return; + } + } lease.release(); } + private static final long serialVersionUID = 525857611466436091L; + } private static final long serialVersionUID = 1L; diff --git a/src/main/resources/org/jenkinsci/plugins/workflow/steps/durable_task/Messages.properties b/src/main/resources/org/jenkinsci/plugins/workflow/steps/durable_task/Messages.properties index 7d097373..191bb337 100644 --- a/src/main/resources/org/jenkinsci/plugins/workflow/steps/durable_task/Messages.properties +++ b/src/main/resources/org/jenkinsci/plugins/workflow/steps/durable_task/Messages.properties @@ -20,7 +20,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -ExecutorPickle.waiting_to_resume=Waiting to resume {0} ExecutorStepExecution.PlaceholderTask.displayName=part of {0} ExecutorStepExecution.PlaceholderTask.displayName_label={0} ({1}) ExecutorStepExecution.queue_task_cancelled=Queue task was cancelled diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickleTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickleTest.java deleted file mode 100644 index cafeabb9..00000000 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickleTest.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2016, CloudBees, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -package org.jenkinsci.plugins.workflow.support.pickles; - -import hudson.model.Item; -import hudson.model.Label; -import hudson.model.Queue; -import hudson.model.Result; -import hudson.model.User; -import hudson.slaves.DumbSlave; -import hudson.slaves.OfflineCause; -import hudson.slaves.RetentionStrategy; -import jenkins.model.InterruptedBuildAction; -import jenkins.model.Jenkins; -import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition; -import org.jenkinsci.plugins.workflow.job.WorkflowJob; -import org.jenkinsci.plugins.workflow.job.WorkflowRun; -import org.jenkinsci.plugins.workflow.steps.durable_task.Messages; -import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution; -import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; -import static org.junit.Assert.*; - -import org.junit.Assert; -import org.junit.Test; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.rules.TemporaryFolder; -import org.jvnet.hudson.test.BuildWatcher; -import org.jvnet.hudson.test.Issue; -import org.jvnet.hudson.test.MockAuthorizationStrategy; -import org.jvnet.hudson.test.JenkinsSessionRule; - -import java.io.InterruptedIOException; -import java.util.Collections; -import java.util.stream.Collectors; - -public class ExecutorPickleTest { - - @ClassRule public static BuildWatcher buildWatcher = new BuildWatcher(); - @Rule public JenkinsSessionRule sessions = new JenkinsSessionRule(); - @Rule public TemporaryFolder tmp = new TemporaryFolder(); - //@Rule public LoggerRule logging = new LoggerRule().record(Queue.class, Level.FINE); - - @Test public void canceledQueueItem() throws Throwable { - sessions.then(j -> { - DumbSlave s = j.createSlave(Label.get("remote")); - WorkflowJob p = j.createProject(WorkflowJob.class, "p"); - p.setDefinition(new CpsFlowDefinition("node('remote') {semaphore 'wait'}", true)); - WorkflowRun b = p.scheduleBuild2(0).waitForStart(); - SemaphoreStep.waitForStart("wait/1", b); - j.jenkins.removeNode(s); - }); - sessions.then(j -> { - SemaphoreStep.success("wait/1", null); - WorkflowRun b = j.jenkins.getItemByFullName("p", WorkflowJob.class).getBuildByNumber(1); - // first prints on 2.35-: hudson.model.Messages.Queue_WaitingForNextAvailableExecutor(); 2.36+: hudson.model.Messages.Node_LabelMissing("Jenkins", "slave0") - j.waitForMessage(Messages.ExecutorPickle_waiting_to_resume(Messages.ExecutorStepExecution_PlaceholderTask_displayName(b.getFullDisplayName())), b); - Queue.Item[] items = Queue.getInstance().getItems(); - assertEquals(1, items.length); - Queue.getInstance().cancel(items[0]); - j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(b)); - InterruptedBuildAction iba = b.getAction(InterruptedBuildAction.class); - assertNotNull(iba); - assertEquals(Collections.singleton(ExecutorStepExecution.QueueTaskCancelled.class), iba.getCauses().stream().map(Object::getClass).collect(Collectors.toSet())); - }); - } - - @Issue("JENKINS-42556") - @Test public void anonDiscover() throws Throwable { - sessions.then(j -> { - j.jenkins.setSecurityRealm(j.createDummySecurityRealm()); - j.jenkins.setAuthorizationStrategy(new MockAuthorizationStrategy(). - grant(Jenkins.ADMINISTER).everywhere().to("admin"). - grant(Jenkins.READ, Item.DISCOVER).everywhere().toEveryone()); - DumbSlave remote = j.createSlave("remote", null, null); - WorkflowJob p = j.createProject(WorkflowJob.class, "p"); - p.setDefinition(new CpsFlowDefinition("node('remote') {semaphore 'wait'}", true)); - SemaphoreStep.waitForStart("wait/1", p.scheduleBuild2(0).waitForStart()); - remote.toComputer().setTemporarilyOffline(true, new OfflineCause.UserCause(User.getById("admin", true), "hold")); - }); - sessions.then(j -> { - SemaphoreStep.success("wait/1", null); - WorkflowJob p = j.jenkins.getItemByFullName("p", WorkflowJob.class); - assertFalse(p.getACL().hasPermission(Jenkins.ANONYMOUS, Item.READ)); - WorkflowRun b = p.getBuildByNumber(1); - j.waitForMessage(Messages.ExecutorPickle_waiting_to_resume(Messages.ExecutorStepExecution_PlaceholderTask_displayName(b.getFullDisplayName())), b); - j.jenkins.getNode("remote").toComputer().setTemporarilyOffline(false, null); - j.assertBuildStatusSuccess(j.waitForCompletion(b)); - }); - } - - /** - * Test that {@link ExecutorPickle} won't spin forever trying to rehydrate if it was using an - * node that disappeared and will never reappear... but still waits a little bit to find out. - * - * I.E. cases where the {@link RetentionStrategy} is {@link RetentionStrategy#NOOP}. - */ - @Issue("JENKINS-36013") - @Test public void normalNodeDisappearance() throws Throwable { - sessions.then(j -> { - // Start up a build that needs executor and then reboot and take the node offline - // Starting job first ensures we don't immediately fail if Node comes from a Cloud - // and takes a min to provision - WorkflowJob p = j.createProject(WorkflowJob.class, "p"); - p.setDefinition(new CpsFlowDefinition("node('ghost') {semaphore 'wait'}", true)); - - DumbSlave s = j.createSlave(Label.get("ghost")); - System.out.println("Agent launched, waiting for semaphore"); - SemaphoreStep.waitForStart("wait/1", p.scheduleBuild2(0).waitForStart()); - j.jenkins.removeNode(s); - }); - - sessions.then(j -> { - // Start up a build and then reboot and take the node offline - assertEquals(0, j.jenkins.getLabel("ghost").getNodes().size()); // Make sure test impl is correctly deleted - assertNull(j.jenkins.getNode("ghost")); // Make sure test impl is correctly deleted - WorkflowRun run = j.jenkins.getItemByFullName("p", WorkflowJob.class).getLastBuild(); - j.waitForMessage("Waiting to resume", run); - Thread.sleep(1000L); - Assert.assertTrue(run.isBuilding()); - Assert.assertEquals("Queue should still have single build Item waiting to resume but didn't", 1, Queue.getInstance().getItems().length); - - try { - Thread.sleep(ExecutorPickle.TIMEOUT_WAITING_FOR_NODE_MILLIS + 1000L); - Assert.assertEquals("Should have given up and killed the Task representing the resuming build", 0, Queue.getInstance().getItems().length ); - Assert.assertFalse(run.isBuilding()); - j.assertBuildStatus(Result.ABORTED, run); - Assert.assertEquals(0, j.jenkins.getQueue().getItems().length); - InterruptedBuildAction iba = run.getAction(InterruptedBuildAction.class); - assertNotNull(iba); - assertEquals(Collections.singleton(ExecutorStepExecution.RemovedNodeCause.class), iba.getCauses().stream().map(Object::getClass).collect(Collectors.toSet())); - } catch (InterruptedIOException ioe) { - throw new AssertionError("Waited for build to detect loss of node and it didn't!", ioe); - } finally { - if (run.isBuilding()) { - run.doKill(); - } - } - }); - } - -} diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorConditionTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorConditionTest.java index 00374984..aa7768fe 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorConditionTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorConditionTest.java @@ -56,7 +56,6 @@ import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; import static org.junit.Assert.assertEquals; import org.junit.ClassRule; -import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.jvnet.hudson.test.BuildWatcher; @@ -249,7 +248,6 @@ public static final class HangStep extends Step { }); } - @Ignore("TODO pending https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/180") @Test public void retryNewStepAcrossRestarts() throws Throwable { logging.record(DurableTaskStep.class, Level.FINE).record(FileMonitoringTask.class, Level.FINE).record(ExecutorStepExecution.class, Level.FINE); sessions.then(r -> { @@ -282,7 +280,6 @@ public static final class HangStep extends Step { }); } - @Ignore("TODO pending https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/180") @Issue("JENKINS-30383") @Test public void retryNodeBlockSynchAcrossRestarts() throws Throwable { logging.record(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java new file mode 100644 index 00000000..2161f790 --- /dev/null +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -0,0 +1,164 @@ +/* + * The MIT License + * + * Copyright (c) 2016, CloudBees, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package org.jenkinsci.plugins.workflow.support.steps; + +import hudson.model.Label; +import hudson.model.Queue; +import hudson.model.Result; +import hudson.slaves.DumbSlave; +import hudson.slaves.RetentionStrategy; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import jenkins.model.InterruptedBuildAction; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.emptyArray; +import static org.hamcrest.Matchers.isA; +import static org.hamcrest.Matchers.anyOf; +import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition; +import org.jenkinsci.plugins.workflow.flow.FlowExecutionList; +import org.jenkinsci.plugins.workflow.job.WorkflowJob; +import org.jenkinsci.plugins.workflow.job.WorkflowRun; +import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.jvnet.hudson.test.BuildWatcher; +import org.jvnet.hudson.test.Issue; +import org.jvnet.hudson.test.JenkinsSessionRule; +import org.jvnet.hudson.test.LoggerRule; + +public class ExecutorStepDynamicContextTest { + + @ClassRule public static BuildWatcher buildWatcher = new BuildWatcher(); + @Rule public JenkinsSessionRule sessions = new JenkinsSessionRule(); + @Rule public TemporaryFolder tmp = new TemporaryFolder(); + @Rule public LoggerRule logging = new LoggerRule(); + + @Test public void canceledQueueItem() throws Throwable { + sessions.then(j -> { + DumbSlave s = j.createSlave(Label.get("remote")); + WorkflowJob p = j.createProject(WorkflowJob.class, "p"); + p.setDefinition(new CpsFlowDefinition("node('remote') {semaphore 'wait'; isUnix()}", true)); + WorkflowRun b = p.scheduleBuild2(0).waitForStart(); + SemaphoreStep.waitForStart("wait/1", b); + j.jenkins.removeNode(s); + }); + sessions.then(j -> { + SemaphoreStep.success("wait/1", null); + WorkflowRun b = j.jenkins.getItemByFullName("p", WorkflowJob.class).getBuildByNumber(1); + while (Queue.getInstance().getItems().length == 0) { + Thread.sleep(100); + } + Queue.Item[] items = Queue.getInstance().getItems(); + assertEquals(1, items.length); + Queue.getInstance().cancel(items[0]); + j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(b)); + InterruptedBuildAction iba = b.getAction(InterruptedBuildAction.class); + assertNotNull(iba); + assertThat(iba.getCauses(), contains(anyOf( + isA(ExecutorStepExecution.QueueTaskCancelled.class), // normal + isA(ExecutorStepExecution.RemovedNodeCause.class)))); // observed on occasion + }); + } + + /** + * Test that a build will not spin forever trying to resume if it was using an + * node that disappeared and will never reappear... but still waits a little bit to find out. + * + * I.E. cases where the {@link RetentionStrategy} is {@link RetentionStrategy#NOOP}. + */ + @Issue("JENKINS-36013") + @Test public void normalNodeDisappearance() throws Throwable { + logging.recordPackage(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); + sessions.then(j -> { + // Start up a build that needs executor and then reboot and take the node offline + // Starting job first ensures we don't immediately fail if Node comes from a Cloud + // and takes a min to provision + WorkflowJob p = j.createProject(WorkflowJob.class, "p"); + p.setDefinition(new CpsFlowDefinition("node('ghost') {if (isUnix()) {sh 'sleep infinity'} else {bat 'echo + sleep infinity && ping -n 999999 localhost'}}", true)); + + DumbSlave s = j.createSlave(Label.get("ghost")); + j.waitForMessage("+ sleep infinity", p.scheduleBuild2(0).waitForStart()); + j.jenkins.removeNode(s); + }); + + sessions.then(j -> { + // Start up a build and then reboot and take the node offline + assertEquals(0, j.jenkins.getLabel("ghost").getNodes().size()); // Make sure test impl is correctly deleted + assertNull(j.jenkins.getNode("ghost")); // Make sure test impl is correctly deleted + WorkflowRun run = j.jenkins.getItemByFullName("p", WorkflowJob.class).getLastBuild(); + j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(run)); + j.assertLogContains("slave0 has been removed for ", run); + assertThat(j.jenkins.getQueue().getItems(), emptyArray()); + InterruptedBuildAction iba = run.getAction(InterruptedBuildAction.class); + assertNotNull(iba); + assertThat(iba.getCauses(), contains(isA(ExecutorStepExecution.RemovedNodeCause.class))); + }); + } + + @Issue("JENKINS-36013") + @Test public void parallelNodeDisappearance() throws Throwable { + logging.recordPackage(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); + sessions.then(j -> { + WorkflowJob p = j.createProject(WorkflowJob.class, "p"); + p.setDefinition(new CpsFlowDefinition("def bs = [:]; for (int _i = 0; _i < 5; _i++) {def i = _i; bs[/b$i/] = {node('remote') {semaphore(/s$i/)}}}; parallel bs", true)); + List agents = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + agents.add(j.createSlave(Label.get("remote"))); + } + WorkflowRun b = p.scheduleBuild2(0).waitForStart(); + for (int i = 0; i < 5; i++) { + SemaphoreStep.waitForStart("s" + i + "/1", b); + } + for (DumbSlave agent : agents) { + j.jenkins.removeNode(agent); + } + }); + sessions.then(j -> { + logging.record(Queue.class, Level.INFO).capture(100); + for (int i = 0; i < 5; i++) { + SemaphoreStep.success("s" + i + "/1", null); + } + WorkflowRun b = j.jenkins.getItemByFullName("p", WorkflowJob.class).getLastBuild(); + // Verify that all the waiting happens in parallel, not serially: + for (int i = 0; i < 5; i++) { + j.waitForMessage("Waiting for reconnection of slave" + i + " before proceeding with build", b); + } + j.assertLogNotContains("assuming it is not coming back", b); + j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(b)); + for (int i = 0; i < 5; i++) { + j.assertLogContains("slave" + i + " has been removed for 15 sec, assuming it is not coming back", b); + } + assertThat(logging.getRecords().stream().filter(r -> r.getLevel().intValue() >= Level.WARNING.intValue()).toArray(), emptyArray()); + }); + } + +} diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepTest.java index aa91507c..61dc295b 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepTest.java @@ -27,7 +27,6 @@ import com.gargoylesoftware.htmlunit.Page; import com.google.common.base.Predicate; import edu.umd.cs.findbugs.annotations.Nullable; -import hudson.EnvVars; import hudson.FilePath; import hudson.Functions; import hudson.model.Computer; @@ -215,7 +214,7 @@ public class ExecutorStepTest { @Test public void buildShellScriptAcrossRestart() throws Throwable { Assume.assumeFalse("TODO not sure how to write a corresponding batch script", Functions.isWindows()); sessions.then(r -> { - logging.record(DurableTaskStep.class, Level.FINE).record(FileMonitoringTask.class, Level.FINE); + logging.record(DurableTaskStep.class, Level.FINE).record(FileMonitoringTask.class, Level.FINE).record(ExecutorStepDynamicContext.class, Level.FINE).record(ExecutorStepExecution.class, Level.FINE); DumbSlave s = r.createSlave("dumbo", null, null); WorkflowJob p = r.createProject(WorkflowJob.class, "demo"); File f1 = new File(r.jenkins.getRootDir(), "f1"); @@ -336,7 +335,7 @@ public void contextualizeFreshFilePathAfterAgentReconnection() throws Throwable Assume.assumeFalse("TODO not sure how to write a corresponding batch script", Functions.isWindows()); sessions.then(r -> { logging.record(DurableTaskStep.class, Level.FINE). - record(FilePathDynamicContext.class, Level.FINE). + record(ExecutorStepDynamicContext.class, Level.FINE). record(WorkspaceList.class, Level.FINE); Slave s = inboundAgents.createAgent(r, "dumbo"); WorkflowJob p = r.createProject(WorkflowJob.class, "demo"); @@ -449,6 +448,8 @@ private static void assertWorkspaceLocked(Computer computer, String workspacePat SemaphoreStep.waitForStart("wait/2", b2); assertTrue(b2.isBuilding()); }); + logging.record(WorkspaceStepExecution.class, Level.FINE); + logging.record(FilePathDynamicContext.class, Level.FINE); sessions.then(r -> { WorkflowJob p = (WorkflowJob) r.jenkins.getItem("demo"); WorkflowRun b = p.getLastBuild(); @@ -497,9 +498,9 @@ private static void assertLogMatches(WorkflowRun build, String regexp) throws IO } @Issue("JENKINS-26130") - @Test public void unloadableExecutorPickle() throws Throwable { + @Test public void unrestorableAgent() throws Throwable { sessions.then(r -> { - DumbSlave dumbo = r.createSlave("dumbo", null, null); // unlike in buildShellScriptAcrossRestart, we *want* this to die after restart + DumbSlave dumbo = r.createSlave("dumbo", null, null); WorkflowJob p = r.createProject(WorkflowJob.class, "p"); p.setDefinition(new CpsFlowDefinition( "node('dumbo') {\n" + @@ -513,11 +514,10 @@ private static void assertLogMatches(WorkflowRun build, String regexp) throws IO WorkflowJob p = r.jenkins.getItemByFullName("p", WorkflowJob.class); WorkflowRun b = p.getLastBuild(); assertTrue(b.isBuilding()); - r.waitForMessage(Messages.ExecutorPickle_waiting_to_resume(Messages.ExecutorStepExecution_PlaceholderTask_displayName(b.getFullDisplayName())), b); - r.waitForMessage(hudson.model.Messages.Queue_NodeOffline("dumbo"), b); - b.getExecutor().interrupt(); + SemaphoreStep.success("wait/1", null); r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b)); assertEquals(Collections.emptyList(), Arrays.asList(Queue.getInstance().getItems())); + r.assertLogContains("dumbo has been removed for 15 sec, assuming it is not coming back", b); }); }