Skip to content

Commit

Permalink
Remote: Report checking cache status before the action is scheduled t…
Browse files Browse the repository at this point in the history
…o run remotely.

Add a Caching state to ActionState. This state indicates the action is checking the cache and should be happened before Scheduling state.

Change ProgressStatus from enum to interface so that we can pass more data to the event handler (which is required to report upload/download details later).

Fixes bazelbuild#13531.
  • Loading branch information
coeuvre committed Jun 4, 2021
1 parent 3bb4493 commit 92fcd97
Show file tree
Hide file tree
Showing 19 changed files with 270 additions and 83 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.google.devtools.build.lib.actions;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.devtools.build.lib.events.ExtendedEventHandler.ProgressLike;

/**
* Notifies that an in-flight action is checking the cache.
*/
public class CachingActionEvent implements ProgressLike {

private final ActionExecutionMetadata action;
private final String strategy;

/**
* Constructs a new event.
*/
public CachingActionEvent(ActionExecutionMetadata action, String strategy) {
this.action = action;
this.strategy = checkNotNull(strategy, "Strategy names are not optional");
}

/**
* Gets the metadata associated with the action.
*/
public ActionExecutionMetadata getActionMetadata() {
return action;
}

/**
* Gets the name of the strategy on which the action is caching.
*/
public String getStrategy() {
return strategy;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@
import com.google.devtools.build.lib.actions.LostInputsActionExecutionException;
import com.google.devtools.build.lib.actions.LostInputsExecException;
import com.google.devtools.build.lib.actions.MetadataProvider;
import com.google.devtools.build.lib.actions.RunningActionEvent;
import com.google.devtools.build.lib.actions.SandboxedSpawnStrategy;
import com.google.devtools.build.lib.actions.SchedulingActionEvent;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnExecutedEvent;
import com.google.devtools.build.lib.actions.SpawnResult;
Expand Down Expand Up @@ -318,7 +316,7 @@ public SortedMap<PathFragment, ActionInput> getInputMapping(PathFragment baseDir
}

@Override
public void report(ProgressStatus state, String name) {
public void report(ProgressStatus progress) {
ActionExecutionMetadata action = spawn.getResourceOwner();
if (action.getOwner() == null) {
return;
Expand All @@ -332,17 +330,7 @@ public void report(ProgressStatus state, String name) {

// TODO(ulfjack): We should report more details to the UI.
ExtendedEventHandler eventHandler = actionExecutionContext.getEventHandler();
switch (state) {
case EXECUTING:
case CHECKING_CACHE:
eventHandler.post(new RunningActionEvent(action, name));
break;
case SCHEDULING:
eventHandler.post(new SchedulingActionEvent(action, name));
break;
default:
break;
}
progress.postTo(eventHandler, action);
}

@Override
Expand Down
9 changes: 8 additions & 1 deletion src/main/java/com/google/devtools/build/lib/exec/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,21 @@ java_library(

java_library(
name = "spawn_runner",
srcs = ["SpawnRunner.java"],
srcs = [
"SpawnCheckingCache.java",
"SpawnExecuting.java",
"SpawnRunner.java",
"SpawnScheduling.java",
],
deps = [
":tree_deleter",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//third_party:auto_value",
"//third_party:jsr305",
],
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.google.devtools.build.lib.exec;

import com.google.auto.value.AutoValue;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.CachingActionEvent;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;

/** The {@link SpawnRunner} is looking for a cache hit. */
@AutoValue
public abstract class SpawnCheckingCache implements ProgressStatus {
public static SpawnCheckingCache create(String name) {
return new AutoValue_SpawnCheckingCache(name);
}

public abstract String name();

@Override
public void postTo(ExtendedEventHandler eventHandler, ActionExecutionMetadata action) {
eventHandler.post(new CachingActionEvent(action, name()));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package com.google.devtools.build.lib.exec;

import com.google.auto.value.AutoValue;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.RunningActionEvent;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;

/**
* Resources are acquired, and there was probably no cache hit. This MUST be posted before
* attempting to execute the subprocess.
*
* <p>Caching {@link SpawnRunner} implementations should only post this after a failed cache
* lookup, but may post this if cache lookup and execution happen within the same step, e.g. as
* part of a single RPC call with no mechanism to report cache misses.
*/
@AutoValue
public abstract class SpawnExecuting implements ProgressStatus {
public static SpawnExecuting create(String name) {
return new AutoValue_SpawnExecuting(name);
}

public abstract String name();

@Override
public void postTo(ExtendedEventHandler eventHandler, ActionExecutionMetadata action) {
eventHandler.post(new RunningActionEvent(action, name()));
}
}
26 changes: 6 additions & 20 deletions src/main/java/com/google/devtools/build/lib/exec/SpawnRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package com.google.devtools.build.lib.exec;

import com.google.devtools.build.lib.actions.ActionContext;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
Expand All @@ -25,6 +26,7 @@
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.actions.cache.MetadataInjector;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.util.io.FileOutErr;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand Down Expand Up @@ -104,25 +106,9 @@ public interface SpawnRunner {
* <p>{@link SpawnRunner} implementations should post a progress status before any potentially
* long-running operation.
*/
enum ProgressStatus {
/** Spawn is waiting for local or remote resources to become available. */
SCHEDULING,

/** The {@link SpawnRunner} is looking for a cache hit. */
CHECKING_CACHE,

/**
* Resources are acquired, and there was probably no cache hit. This MUST be posted before
* attempting to execute the subprocess.
*
* <p>Caching {@link SpawnRunner} implementations should only post this after a failed cache
* lookup, but may post this if cache lookup and execution happen within the same step, e.g. as
* part of a single RPC call with no mechanism to report cache misses.
*/
EXECUTING,

/** Downloading outputs from a remote machine. */
DOWNLOADING
interface ProgressStatus {
/** Post this progress event to the given {@link ExtendedEventHandler}. */
void postTo(ExtendedEventHandler eventHandler, ActionExecutionMetadata action);
}

/**
Expand Down Expand Up @@ -214,7 +200,7 @@ SortedMap<PathFragment, ActionInput> getInputMapping(PathFragment baseDirectory)
throws IOException, ForbiddenActionInputException;

/** Reports a progress update to the Spawn strategy. */
void report(ProgressStatus state, String name);
void report(ProgressStatus progress);

/**
* Returns a {@link MetadataInjector} that allows a caller to inject metadata about spawn
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.google.devtools.build.lib.exec;

import com.google.auto.value.AutoValue;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.SchedulingActionEvent;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;

/**
* Spawn is waiting for local or remote resources to become available.
*/
@AutoValue
public abstract class SpawnScheduling implements ProgressStatus {
public static SpawnScheduling create(String name) {
return new AutoValue_SpawnScheduling(name);
}

public abstract String name();

@Override
public void postTo(ExtendedEventHandler eventHandler, ActionExecutionMetadata action) {
eventHandler.post(new SchedulingActionEvent(action, name()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.exec.BinTools;
import com.google.devtools.build.lib.exec.RunfilesTreeUpdater;
import com.google.devtools.build.lib.exec.SpawnExecuting;
import com.google.devtools.build.lib.exec.SpawnScheduling;
import com.google.devtools.build.lib.exec.SpawnRunner;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
Expand Down Expand Up @@ -131,10 +133,10 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
Profiler.instance()
.profile(ProfilerTask.LOCAL_EXECUTION, spawn.getResourceOwner().getMnemonic())) {
ActionExecutionMetadata owner = spawn.getResourceOwner();
context.report(ProgressStatus.SCHEDULING, getName());
context.report(SpawnScheduling.create(getName()));
try (ResourceHandle handle =
resourceManager.acquireResources(owner, spawn.getLocalResources())) {
context.report(ProgressStatus.EXECUTING, getName());
context.report(SpawnExecuting.create(getName()));
if (!localExecutionOptions.localLockfreeOutput) {
context.lockOutputFiles();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.Reporter;
import com.google.devtools.build.lib.exec.SpawnCache;
import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;
import com.google.devtools.build.lib.exec.SpawnCheckingCache;
import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionContext;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
Expand Down Expand Up @@ -96,7 +96,7 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionContext context)
Profiler prof = Profiler.instance();
if (options.remoteAcceptCached
|| (options.incompatibleRemoteResultsIgnoreDisk && useDiskCache(options))) {
context.report(ProgressStatus.CHECKING_CACHE, "remote-cache");
context.report(SpawnCheckingCache.create("remote-cache"));
// Metadata will be available in context.current() until we detach.
// This is done via a thread-local variable.
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@
import com.google.devtools.build.lib.exec.AbstractSpawnStrategy;
import com.google.devtools.build.lib.exec.ExecutionOptions;
import com.google.devtools.build.lib.exec.RemoteLocalFallbackRegistry;
import com.google.devtools.build.lib.exec.SpawnCheckingCache;
import com.google.devtools.build.lib.exec.SpawnExecuting;
import com.google.devtools.build.lib.exec.SpawnScheduling;
import com.google.devtools.build.lib.exec.SpawnRunner;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
Expand Down Expand Up @@ -143,7 +146,7 @@ public void onNext(Operation o) throws IOException {
}

public void reportExecuting() {
context.report(ProgressStatus.EXECUTING, getName());
context.report(SpawnExecuting.create(getName()));
reportedExecuting = true;
}

Expand All @@ -165,8 +168,6 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
boolean uploadLocalResults = remoteOptions.remoteUploadLocalResults && spawnCacheableRemotely;
boolean acceptCachedResult = remoteOptions.remoteAcceptCached && spawnCacheableRemotely;

context.report(ProgressStatus.SCHEDULING, getName());

RemoteAction action = remoteExecutionService.buildRemoteAction(spawn, context);
SpawnMetrics.Builder spawnMetrics =
SpawnMetrics.Builder.forRemoteExec()
Expand All @@ -179,6 +180,8 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)

Profiler prof = Profiler.instance();
try {
context.report(SpawnCheckingCache.create(getName()));

// Try to lookup the action in the action cache.
RemoteActionResult cachedResult;
try (SilentCloseable c = prof.profile(ProfilerTask.REMOTE_CACHE_CHECK, "check cache hit")) {
Expand Down Expand Up @@ -232,6 +235,8 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
.minus(action.getNetworkTime().getDuration().minus(networkTimeStart)));
}

context.report(SpawnScheduling.create(getName()));

ExecutingStatusReporter reporter = new ExecutingStatusReporter(context);
RemoteActionResult result;
try (SilentCloseable c = prof.profile(REMOTE_EXECUTION, "execute remotely")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.devtools.build.lib.actions.ActionCompletionEvent;
import com.google.devtools.build.lib.actions.ActionScanningCompletedEvent;
import com.google.devtools.build.lib.actions.ActionStartedEvent;
import com.google.devtools.build.lib.actions.CachingActionEvent;
import com.google.devtools.build.lib.actions.RunningActionEvent;
import com.google.devtools.build.lib.actions.ScanningActionEvent;
import com.google.devtools.build.lib.actions.SchedulingActionEvent;
Expand Down Expand Up @@ -671,6 +672,13 @@ public void stopScanningAction(StoppedScanningActionEvent event) {
refresh();
}

@Subscribe
@AllowConcurrentEvents
public void checkingActionCache(CachingActionEvent event) {
stateTracker.cachingAction(event);
refresh();
}

@Subscribe
@AllowConcurrentEvents
public void schedulingAction(SchedulingActionEvent event) {
Expand Down
Loading

0 comments on commit 92fcd97

Please sign in to comment.