diff --git a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java index 913a3234b1d982..869f6d2b42f43f 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java @@ -261,16 +261,14 @@ public ListenableFuture prefetchInputs() (BulkTransferException e) -> { if (BulkTransferException.allCausedByCacheNotFoundException(e)) { var code = - executionOptions.useNewExitCodeForLostInputs + (executionOptions.useNewExitCodeForLostInputs + || executionOptions.remoteRetryOnCacheEviction > 0) ? Code.REMOTE_CACHE_EVICTED : Code.REMOTE_CACHE_FAILED; throw new EnvironmentalExecException( e, FailureDetail.newBuilder() - .setMessage( - "Failed to fetch blobs because they do not exist remotely." - + " Build without the Bytes does not work if your remote" - + " cache evicts blobs during builds.") + .setMessage("Failed to fetch blobs because they do not exist remotely.") .setSpawn(FailureDetails.Spawn.newBuilder().setCode(code)) .build()); } else { diff --git a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java index e439bce3cc2006..d2164b292d457c 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java +++ b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java @@ -507,6 +507,17 @@ public boolean usingLocalTestJobs() { + " blobs during the build.") public boolean useNewExitCodeForLostInputs; + @Option( + name = "experimental_remote_cache_eviction_retries", + defaultValue = "0", + documentationCategory = OptionDocumentationCategory.REMOTE, + effectTags = {OptionEffectTag.EXECUTION}, + help = + "The maximum number of attempts to retry if the build encountered remote cache eviction" + + " error. A non-zero value will implicitly set" + + " --incompatible_remote_use_new_exit_code_for_lost_inputs to true.") + public int remoteRetryOnCacheEviction; + /** An enum for specifying different formats of test output. */ public enum TestOutputFormat { SUMMARY, // Provide summary output only. diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index c01255feb42fa6..7a42a266b7550f 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -556,7 +556,8 @@ private SpawnResult handleError( catastrophe = true; } else if (remoteCacheFailed) { status = Status.REMOTE_CACHE_FAILED; - if (executionOptions.useNewExitCodeForLostInputs) { + if (executionOptions.useNewExitCodeForLostInputs + || executionOptions.remoteRetryOnCacheEviction > 0) { detailedCode = FailureDetails.Spawn.Code.REMOTE_CACHE_EVICTED; } else { detailedCode = FailureDetails.Spawn.Code.REMOTE_CACHE_FAILED; diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java index 0fbd5a2d3f0327..88c91ad71fb7c4 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java @@ -54,6 +54,7 @@ import com.google.devtools.build.lib.util.AnsiStrippingOutputStream; import com.google.devtools.build.lib.util.DebugLoggerConfigurator; import com.google.devtools.build.lib.util.DetailedExitCode; +import com.google.devtools.build.lib.util.ExitCode; import com.google.devtools.build.lib.util.InterruptedFailureDetails; import com.google.devtools.build.lib.util.LoggingUtil; import com.google.devtools.build.lib.util.Pair; @@ -148,6 +149,55 @@ public BlazeCommandResult exec( Optional>> startupOptionsTaggedWithBazelRc, List commandExtensions) throws InterruptedException { + var remoteCacheEvictionRetries = getRemoteCacheEvictionRetries(args, outErr); + while (true) { + var result = + execOnce( + invocationPolicy, + args, + outErr, + lockingMode, + clientDescription, + firstContactTimeMillis, + startupOptionsTaggedWithBazelRc, + commandExtensions); + if (result.getExitCode() == ExitCode.REMOTE_CACHE_EVICTED && remoteCacheEvictionRetries > 0) { + --remoteCacheEvictionRetries; + outErr.printErrLn("Found remote cache eviction error, retrying the build..."); + continue; + } + return result; + } + } + + private int getRemoteCacheEvictionRetries(List args, OutErr outErr) { + // Since flags are not parsed yet at this point, we manually extract value of the retry flag. + var retryFlagPrefix = "--experimental_remote_cache_eviction_retries="; + for (var arg : args) { + if (arg.startsWith(retryFlagPrefix)) { + try { + return Integer.parseInt(arg.substring(retryFlagPrefix.length())); + } catch (NumberFormatException e) { + outErr.printErrLn( + String.format( + "Failed to parse retry times: %s, remote cache eviction retry is disabled", e)); + return 0; + } + } + } + return 0; + } + + public BlazeCommandResult execOnce( + InvocationPolicy invocationPolicy, + List args, + OutErr outErr, + LockingMode lockingMode, + String clientDescription, + long firstContactTimeMillis, + Optional>> startupOptionsTaggedWithBazelRc, + List commandExtensions) + throws InterruptedException { OriginalUnstructuredCommandLineEvent originalCommandLine = new OriginalUnstructuredCommandLineEvent(args); Preconditions.checkNotNull(clientDescription); diff --git a/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java b/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java index c27756888f02ab..7f8429707f088f 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java @@ -474,9 +474,7 @@ public void remoteCacheEvictBlobs_whenPrefetchingInput_exitWithCode39() throws E // Assert: Exit code is 39 assertThat(error) .hasMessageThat() - .contains( - "Build without the Bytes does not work if your remote cache evicts blobs" - + " during builds"); + .contains("Failed to fetch blobs because they do not exist remotely"); assertThat(error).hasMessageThat().contains(String.format("%s/%s", hashCode, bytes.length)); assertThat(error.getDetailedExitCode().getExitCode().getNumericExitCode()).isEqualTo(39); } diff --git a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh index 21e015547dbff6..cbc9f6064eae13 100755 --- a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh +++ b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh @@ -1685,4 +1685,62 @@ end_of_record" expect_log "$expected_result" } +function test_remote_cache_eviction_when_prefetching_input() { + mkdir -p a + + cat > a/BUILD <<'EOF' +genrule( + name = 'foo', + srcs = ['foo.in'], + outs = ['foo.out'], + cmd = 'cat $(SRCS) > $@', +) + +genrule( + name = 'bar', + srcs = ['foo.out', 'bar.in'], + outs = ['bar.out'], + cmd = 'cat $(SRCS) > $@', + tags = ['no-remote-exec'], +) +EOF + + echo foo > a/foo.in + echo bar > a/bar.in + + # Populate remote cache + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + //a:bar >& $TEST_log || fail "Failed to build" + + bazel clean + + # Clean build, foo.out isn't downloaded + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + //a:bar >& $TEST_log || fail "Failed to build" + + (! [[ -f bazel-bin/a/foo.out ]]) \ + || fail "Expected intermediate output bazel-bin/a/foo.out to not be downloaded" + + # Evict blobs from remote cache + stop_worker + start_worker + + echo "updated bar" > a/bar.in + + # Incremental build triggers remote cache eviction error but Bazel + # automatically retries the build and reruns the generating actions for + # missing blobs + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + --experimental_remote_cache_eviction_retries=5 \ + //a:bar >& $TEST_log || fail "Failed to build" + + expect_log "Found remote cache eviction error, retrying the build..." +} + run_suite "Build without the Bytes tests"