diff --git a/src/main/java/com/google/devtools/build/lib/worker/JsonWorkerProtocol.java b/src/main/java/com/google/devtools/build/lib/worker/JsonWorkerProtocol.java index 3990a9bdc7518a..8c594515eedd26 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/JsonWorkerProtocol.java +++ b/src/main/java/com/google/devtools/build/lib/worker/JsonWorkerProtocol.java @@ -54,6 +54,17 @@ public void putRequest(WorkRequest request) throws IOException { @Override public WorkResponse getResponse() throws IOException { + boolean interrupted = Thread.interrupted(); + try { + return parseResponse(); + } finally { + if (interrupted) { + Thread.currentThread().interrupt(); + } + } + } + + private WorkResponse parseResponse() throws IOException { Integer exitCode = null; String output = null; Integer requestId = null; diff --git a/src/main/java/com/google/devtools/build/lib/worker/ProtoWorkerProtocol.java b/src/main/java/com/google/devtools/build/lib/worker/ProtoWorkerProtocol.java index 6eed2ca8463ada..199f24f7cf66ca 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/ProtoWorkerProtocol.java +++ b/src/main/java/com/google/devtools/build/lib/worker/ProtoWorkerProtocol.java @@ -41,7 +41,14 @@ public void putRequest(WorkRequest request) throws IOException { @Override public WorkResponse getResponse() throws IOException { - return WorkResponse.parseDelimitedFrom(workersStdout); + boolean interrupted = Thread.interrupted(); + try { + return WorkResponse.parseDelimitedFrom(workersStdout); + } finally { + if (interrupted) { + Thread.currentThread().interrupt(); + } + } } @Override diff --git a/src/main/java/com/google/devtools/build/lib/worker/SingleplexWorker.java b/src/main/java/com/google/devtools/build/lib/worker/SingleplexWorker.java index a8684d1c229931..2c06478b0733fa 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/SingleplexWorker.java +++ b/src/main/java/com/google/devtools/build/lib/worker/SingleplexWorker.java @@ -118,8 +118,28 @@ void putRequest(WorkRequest request) throws IOException { } @Override - WorkResponse getResponse(int requestId) throws IOException { + WorkResponse getResponse(int requestId) throws IOException, InterruptedException { recordingInputStream.startRecording(4096); + // Ironically, we don't allow interrupts during dynamic execution, since we can't cancel + // the worker short of destroying it. + if (!workerKey.isSpeculative()) { + while (recordingInputStream.available() == 0) { + try { + Thread.sleep(10); + } catch (InterruptedException e) { + // This should only happen when not in dynamic execution, so we can safely kill the + // worker. + destroy(); + throw e; + } + if (!process.isAlive()) { + throw new IOException( + String.format( + "Worker process for %s died while waiting for response", + workerKey.getMnemonic())); + } + } + } return workerProtocol.getResponse(); } diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java index 5088d50071c219..f05b8fe31c6296 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java @@ -60,7 +60,7 @@ public Worker create(WorkerKey key) { workerBaseDir.getRelative(workTypeName + "-" + workerId + "-" + key.getMnemonic() + ".log"); Worker worker; - boolean sandboxed = workerOptions.workerSandboxing || key.mustBeSandboxed(); + boolean sandboxed = workerOptions.workerSandboxing || key.isSpeculative(); if (sandboxed) { Path workDir = getSandboxedWorkerPath(key, workerId); worker = new SandboxedWorker(key, workerId, workDir, logFile); @@ -124,30 +124,18 @@ public boolean validateObject(WorkerKey key, PooledObject p) { Worker worker = p.getObject(); Optional exitValue = worker.getExitValue(); if (exitValue.isPresent()) { - if (workerOptions.workerVerbose) { - if (worker.diedUnexpectedly()) { - String msg = - String.format( - "%s %s (id %d) has unexpectedly died with exit code %d.", - key.getMnemonic(), - key.getWorkerTypeName(), - worker.getWorkerId(), - exitValue.get()); - ErrorMessage errorMessage = - ErrorMessage.builder() - .message(msg) - .logFile(worker.getLogFile()) - .logSizeLimit(4096) - .build(); - reporter.handle(Event.warn(errorMessage.toString())); - } else { - // Can't rule this out entirely, but it's not an unexpected death. - String msg = - String.format( - "%s %s (id %d) was destroyed, but is still in the worker pool.", - key.getMnemonic(), key.getWorkerTypeName(), worker.getWorkerId()); - reporter.handle(Event.info(msg)); - } + if (workerOptions.workerVerbose && worker.diedUnexpectedly()) { + String msg = + String.format( + "%s %s (id %d) has unexpectedly died with exit code %d.", + key.getMnemonic(), key.getWorkerTypeName(), worker.getWorkerId(), exitValue.get()); + ErrorMessage errorMessage = + ErrorMessage.builder() + .message(msg) + .logFile(worker.getLogFile()) + .logSizeLimit(4096) + .build(); + reporter.handle(Event.warn(errorMessage.toString())); } return false; } diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java index c4741d6ed85f4a..d04ecaba4f2f0a 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java @@ -32,9 +32,13 @@ * break various things as well as render the workers less useful. */ final class WorkerKey { + /** Build options. */ private final ImmutableList args; + /** Environment variables. */ private final ImmutableMap env; + /** Execution root of Bazel process. */ private final Path execRoot; + /** Mnemonic of the worker. */ private final String mnemonic; /** @@ -43,8 +47,10 @@ final class WorkerKey { * methods. */ private final HashCode workerFilesCombinedHash; + /** Worker files with the corresponding hash code. */ private final SortedMap workerFilesWithHashes; - private final boolean mustBeSandboxed; + /** Set it to true if this job is running speculatively and thus likely to be interrupted. */ + private final boolean isSpeculative; /** A WorkerProxy will be instantiated if true, instantiate a regular Worker if false. */ private final boolean proxied; /** @@ -52,7 +58,7 @@ final class WorkerKey { * (ImmutableMap and ImmutableList do not cache their hashcodes. */ private final int hash; - + /** The format of the worker protocol sent to and read from the worker. */ private final WorkerProtocolFormat protocolFormat; WorkerKey( @@ -62,26 +68,17 @@ final class WorkerKey { String mnemonic, HashCode workerFilesCombinedHash, SortedMap workerFilesWithHashes, - boolean mustBeSandboxed, + boolean isSpeculative, boolean proxied, WorkerProtocolFormat protocolFormat) { - /** Build options. */ this.args = Preconditions.checkNotNull(args); - /** Environment variables. */ this.env = Preconditions.checkNotNull(env); - /** Execution root of Bazel process. */ this.execRoot = Preconditions.checkNotNull(execRoot); - /** Mnemonic of the worker. */ this.mnemonic = Preconditions.checkNotNull(mnemonic); - /** One combined hash code for all files. */ this.workerFilesCombinedHash = Preconditions.checkNotNull(workerFilesCombinedHash); - /** Worker files with the corresponding hash code. */ this.workerFilesWithHashes = Preconditions.checkNotNull(workerFilesWithHashes); - /** Set it to true if this job should be run in sandbox. */ - this.mustBeSandboxed = mustBeSandboxed; - /** Set it to true if this job should be run with WorkerProxy. */ + this.isSpeculative = isSpeculative; this.proxied = proxied; - /** The format of the worker protocol sent to and read from the worker. */ this.protocolFormat = protocolFormat; hash = calculateHashCode(); @@ -117,9 +114,9 @@ public SortedMap getWorkerFilesWithHashes() { return workerFilesWithHashes; } - /** Getter function for variable mustBeSandboxed. */ - public boolean mustBeSandboxed() { - return mustBeSandboxed; + /** Returns true if workers are run speculatively. */ + public boolean isSpeculative() { + return isSpeculative; } /** Getter function for variable proxied. */ @@ -128,7 +125,7 @@ public boolean getProxied() { } public boolean isMultiplex() { - return getProxied() && !mustBeSandboxed; + return getProxied() && !isSpeculative; } /** Returns the format of the worker protocol. */ @@ -147,7 +144,7 @@ public static String makeWorkerTypeName(boolean proxied, boolean mustBeSandboxed /** Returns a user-friendly name for this worker type. */ public String getWorkerTypeName() { - return makeWorkerTypeName(proxied, mustBeSandboxed); + return makeWorkerTypeName(proxied, isSpeculative); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java index 43a3c058b6fdce..a34c9bd0fb8445 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java @@ -20,7 +20,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.eventbus.Subscribe; import com.google.devtools.build.lib.buildtool.buildevent.BuildCompleteEvent; -import com.google.devtools.build.lib.buildtool.buildevent.BuildInterruptedEvent; import com.google.devtools.build.lib.buildtool.buildevent.BuildStartingEvent; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.exec.ExecutionOptions; @@ -72,7 +71,8 @@ public void cleanStarting(CleanStartingEvent event) { this.options = event.getOptionsProvider().getOptions(WorkerOptions.class); workerFactory.setReporter(env.getReporter()); workerFactory.setOptions(options); - shutdownPool("Clean command is running, shutting down worker pool..."); + shutdownPool( + "Clean command is running, shutting down worker pool...", /* alwaysLog= */ false); } } @@ -179,31 +179,10 @@ public void registerSpawnStrategies( @Subscribe public void buildComplete(BuildCompleteEvent event) { if (options != null && options.workerQuitAfterBuild) { - shutdownPool("Build completed, shutting down worker pool..."); - } - } - - /** - * Stops any workers that are still executing. - * - *

This currently kills off some amount of workers, losing the warmed-up state. - * TODO(b/119701157): Cancel running workers instead (requires some way to reach each worker). - */ - @Subscribe - public void buildInterrupted(BuildInterruptedEvent event) { - if (workerPool != null) { - if ((options != null && options.workerVerbose)) { - env.getReporter().handle(Event.info("Build interrupted, stopping active workers...")); - } - workerPool.stopWork(); + shutdownPool("Build completed, shutting down worker pool...", /* alwaysLog= */ false); } } - /** Shuts down the worker pool and sets {#code workerPool} to null. */ - private void shutdownPool(String reason) { - shutdownPool(reason, /* alwaysLog= */ false); - } - /** Shuts down the worker pool and sets {#code workerPool} to null. */ private void shutdownPool(String reason, boolean alwaysLog) { Preconditions.checkArgument(!reason.isEmpty()); diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerMultiplexer.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerMultiplexer.java index 407a0b18c865e5..ab537e101becba 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerMultiplexer.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerMultiplexer.java @@ -314,6 +314,8 @@ private boolean sendRequest() { * *

This is only called on the readResponses subthread and so cannot be interrupted by dynamic * execution cancellation, but only by a call to {@link #destroyProcess()}. + * + * @return True if the worker is still in a consistent state. */ private boolean readResponse() { WorkResponse parsedResponse; diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java index abfa98c7ddf170..ba630a52578da9 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java @@ -189,28 +189,12 @@ private void waitForHighPriorityWorkersToFinish() throws InterruptedException { } } + /** + * Closes all the worker pools, destroying the workers in the process. This waits for any + * currently-ongoing work to finish. + */ public void close() { workerPools.values().forEach(GenericKeyedObjectPool::close); multiplexPools.values().forEach(GenericKeyedObjectPool::close); } - - /** Stops any ongoing work in the worker pools. This may entail killing the worker processes. */ - public void stopWork() { - workerPools - .values() - .forEach( - pool -> { - if (pool.getNumActive() > 0) { - pool.clear(); - } - }); - multiplexPools - .values() - .forEach( - pool -> { - if (pool.getNumActive() > 0) { - pool.clear(); - } - }); - } } diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java index 9528084e0c215c..eba2755b9a67a8 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java @@ -381,7 +381,6 @@ WorkResponse execInWorker( Worker worker = null; WorkResponse response; WorkRequest request; - ActionExecutionMetadata owner = spawn.getResourceOwner(); try { Stopwatch setupInputsStopwatch = Stopwatch.createStarted(); diff --git a/src/test/java/com/google/devtools/build/lib/worker/WorkerKeyTest.java b/src/test/java/com/google/devtools/build/lib/worker/WorkerKeyTest.java index 0a078b63a6fe16..e8d5aa6a0be816 100644 --- a/src/test/java/com/google/devtools/build/lib/worker/WorkerKeyTest.java +++ b/src/test/java/com/google/devtools/build/lib/worker/WorkerKeyTest.java @@ -23,7 +23,6 @@ import com.google.devtools.build.lib.actions.ExecutionRequirements.WorkerProtocolFormat; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.FileSystem; -import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem; import org.junit.Test; import org.junit.runner.RunWith; @@ -34,40 +33,53 @@ public class WorkerKeyTest { final FileSystem fs = new InMemoryFileSystem(DigestHashFunction.SHA256); - Path workerBaseDir = fs.getPath("/outputbase/bazel-workers"); - WorkerKey workerKey = - new WorkerKey( - /* args= */ ImmutableList.of("arg1", "arg2", "arg3"), - /* env= */ ImmutableMap.of("env1", "foo", "env2", "bar"), - /* execRoot= */ fs.getPath("/outputbase/execroot/workspace"), - /* mnemonic= */ "dummy", - /* workerFilesCombinedHash= */ HashCode.fromInt(0), - /* workerFilesWithHashes= */ ImmutableSortedMap.of(), - /* mustBeSandboxed= */ true, - /* proxied= */ true, - WorkerProtocolFormat.PROTO); + private WorkerKey makeWorkerKey(boolean multiplex, boolean dynamic) { + return new WorkerKey( + /* args= */ ImmutableList.of("arg1", "arg2", "arg3"), + /* env= */ ImmutableMap.of("env1", "foo", "env2", "bar"), + /* execRoot= */ fs.getPath("/outputbase/execroot/workspace"), + /* mnemonic= */ "dummy", + /* workerFilesCombinedHash= */ HashCode.fromInt(0), + /* workerFilesWithHashes= */ ImmutableSortedMap.of(), + /* isSpeculative= */ dynamic, + /* proxied= */ multiplex, + WorkerProtocolFormat.PROTO); + } @Test public void testWorkerKeyGetter() { - assertThat(workerKey.mustBeSandboxed()).isTrue(); - assertThat(workerKey.getProxied()).isTrue(); - assertThat(workerKey.isMultiplex()).isFalse(); - assertThat(workerKey.getWorkerTypeName()).isEqualTo("worker"); - assertThat(WorkerKey.makeWorkerTypeName(/* proxied=*/ false, /* mustBeSandboxed=*/ false)) - .isEqualTo("worker"); - assertThat(WorkerKey.makeWorkerTypeName(/* proxied=*/ false, /* mustBeSandboxed=*/ true)) - .isEqualTo("worker"); - assertThat(WorkerKey.makeWorkerTypeName(/* proxied=*/ true, /* mustBeSandboxed=*/ false)) - .isEqualTo("multiplex-worker"); - assertThat(WorkerKey.makeWorkerTypeName(/* proxied=*/ true, /* mustBeSandboxed=*/ true)) - .isEqualTo("worker"); + WorkerKey keyNomultiNodynamic = makeWorkerKey(false, false); + assertThat(keyNomultiNodynamic.isSpeculative()).isFalse(); + assertThat(keyNomultiNodynamic.getProxied()).isFalse(); + assertThat(keyNomultiNodynamic.isMultiplex()).isFalse(); + assertThat(keyNomultiNodynamic.getWorkerTypeName()).isEqualTo("worker"); + + WorkerKey keyMultiNoDynamic = makeWorkerKey(true, false); + assertThat(keyMultiNoDynamic.isSpeculative()).isFalse(); + assertThat(keyMultiNoDynamic.getProxied()).isTrue(); + assertThat(keyMultiNoDynamic.isMultiplex()).isTrue(); + assertThat(keyMultiNoDynamic.getWorkerTypeName()).isEqualTo("multiplex-worker"); + + WorkerKey keyNoMultiDynamic = makeWorkerKey(false, true); + assertThat(keyNoMultiDynamic.isSpeculative()).isTrue(); + assertThat(keyNoMultiDynamic.getProxied()).isFalse(); + assertThat(keyNoMultiDynamic.isMultiplex()).isFalse(); + assertThat(keyNoMultiDynamic.getWorkerTypeName()).isEqualTo("worker"); + + WorkerKey keyMultiDynamic = makeWorkerKey(true, true); + assertThat(keyMultiDynamic.isSpeculative()).isTrue(); + assertThat(keyMultiDynamic.getProxied()).isTrue(); + assertThat(keyMultiDynamic.isMultiplex()).isFalse(); + assertThat(keyMultiDynamic.getWorkerTypeName()).isEqualTo("worker"); + // Hash code contains args, env, execRoot, proxied, and mnemonic. - assertThat(workerKey.hashCode()).isEqualTo(1605714200); - assertThat(workerKey.getProtocolFormat()).isEqualTo(WorkerProtocolFormat.PROTO); + assertThat(keyMultiDynamic.hashCode()).isEqualTo(1605714200); + assertThat(keyMultiDynamic.getProtocolFormat()).isEqualTo(WorkerProtocolFormat.PROTO); } @Test public void testWorkerKeyEquality() { + WorkerKey workerKey = makeWorkerKey(true, true); WorkerKey workerKeyWithSameFields = new WorkerKey( workerKey.getArgs(), @@ -76,7 +88,7 @@ public void testWorkerKeyEquality() { workerKey.getMnemonic(), workerKey.getWorkerFilesCombinedHash(), workerKey.getWorkerFilesWithHashes(), - workerKey.mustBeSandboxed(), + workerKey.isSpeculative(), workerKey.getProxied(), workerKey.getProtocolFormat()); assertThat(workerKey).isEqualTo(workerKeyWithSameFields); @@ -84,6 +96,7 @@ public void testWorkerKeyEquality() { @Test public void testWorkerKeyInequality_protocol() { + WorkerKey workerKey = makeWorkerKey(true, true); WorkerKey workerKeyWithDifferentProtocol = new WorkerKey( workerKey.getArgs(), @@ -92,7 +105,7 @@ public void testWorkerKeyInequality_protocol() { workerKey.getMnemonic(), workerKey.getWorkerFilesCombinedHash(), workerKey.getWorkerFilesWithHashes(), - workerKey.mustBeSandboxed(), + workerKey.isSpeculative(), workerKey.getProxied(), WorkerProtocolFormat.JSON); assertThat(workerKey).isNotEqualTo(workerKeyWithDifferentProtocol); diff --git a/src/test/java/com/google/devtools/build/lib/worker/WorkerPoolTest.java b/src/test/java/com/google/devtools/build/lib/worker/WorkerPoolTest.java index d1f426d8638169..036ca7b11be835 100644 --- a/src/test/java/com/google/devtools/build/lib/worker/WorkerPoolTest.java +++ b/src/test/java/com/google/devtools/build/lib/worker/WorkerPoolTest.java @@ -238,50 +238,4 @@ public void testBorrow_twoHiPrioBlocks() throws Exception { verify(factoryMock, times(2)).makeObject(workerKey1); verify(factoryMock, times(1)).makeObject(workerKey2); } - - @Test - public void testStopWork_activePoolsStopped() throws Exception { - WorkerPool pool = - new WorkerPool( - factoryMock, - // Have to declare the mnemonics, or they all fall into the default SimpleWorkerPool. - ImmutableMap.of("mnem1", 2, "mnem2", 2), - ImmutableMap.of("mnem2", 2, "mnem3", 2), - Lists.newArrayList()); - WorkerKey singleKey1 = createWorkerKey(fileSystem, "mnem1", false); - // These workers get borrowed, then both get destroyed in stopWork because they share mnemonic - WorkerKey singleKey1a = createWorkerKey(fileSystem, "mnem1", false, "anArg"); - pool.borrowObject(singleKey1); - Worker worker1a = pool.borrowObject(singleKey1a); - pool.returnObject(singleKey1a, worker1a); - WorkerKey singleKey2 = createWorkerKey(fileSystem, "mnem2", false); - // This worker gets borrowed, then returned, doesn't get destroyed in stopWork - Worker worker1 = pool.borrowObject(singleKey2); - pool.returnObject(singleKey2, worker1); - WorkerKey multiKey1 = createWorkerKey(fileSystem, "mnem2", true); - // This worker gets borrowed, then destroyed in stopWork, but separately from the singleplex - // worker2 even though they share a mnemonic. - pool.borrowObject(multiKey1); - WorkerKey multiKey2 = createWorkerKey(fileSystem, "mnem3", true); - // This worker gets borrowed, then returned, doesn't get destroyed during stopWork. - Worker worker2 = pool.borrowObject(multiKey2); - pool.returnObject(multiKey2, worker2); - verify(factoryMock, times(1)).makeObject(singleKey1); - verify(factoryMock, times(1)).makeObject(singleKey1a); - verify(factoryMock, times(1)).makeObject(singleKey2); - verify(factoryMock, times(1)).makeObject(multiKey1); - verify(factoryMock, times(1)).makeObject(multiKey2); - pool.stopWork(); - pool.borrowObject(singleKey1); - pool.borrowObject(singleKey1a); - pool.borrowObject(singleKey2); - pool.borrowObject(multiKey1); - pool.borrowObject(multiKey2); - // After stopWork, we had to create new workers for the keys that got their pools destroyed. - verify(factoryMock, times(2)).makeObject(singleKey1); - verify(factoryMock, times(2)).makeObject(singleKey1a); - verify(factoryMock, times(1)).makeObject(singleKey2); - verify(factoryMock, times(2)).makeObject(multiKey1); - verify(factoryMock, times(1)).makeObject(multiKey2); - } }