Adding logic to find and run failed actions in V2 logs. (buildfarm#6)

* Adding logic to find and run failed actions in V2 logs.
sgbalogh · Dec 5, 2018 · c33ca01 · c33ca01
1 parent efe95c3
commit c33ca01
Show file tree

Hide file tree

Showing 9 changed files with 723 additions and 54 deletions.
diff --git a/README.md b/README.md
@@ -62,10 +62,28 @@ human-readable way with the `printlog` command:
 
     $ bazel-bin/remote_client --grpc_log PATH_TO_LOG printlog
 
+## Getting a list of failed actions
+
+This is available to Remote API V2 functionality, which is present in Bazel 0.17
+and onward.
+
+The remote tool can analyse the grps log and print a list of digests of failed
+actions:
+
+    $ bazel-bin/remote_client --grpc_log PATH_TO_LOG failed_actions
+
+For the purpose of this command, a failed action is any action whose execute
+response returned a non-zero exit status or a non-zero status code (for example, DEADLINE_EXCEEDED).
+An action that has been successfully retried will not be included.
+Any action that did not obtain an execute response will not be included - this could happen, for
+example, when remote execution was unavailable.
+
+
 ### Running Actions in Docker
 
 This is available to Remote API V2 functionality, which is present in Bazel 0.17
-and onward:
+and onward.
+
 
 Given an Action in protobuf text format that provides a `container-image` platform, this tool can
 set up its inputs in a local directory and print a Docker command that will run this individual
@@ -127,6 +145,17 @@ Instead of using action digest, the above commands can use the parameter
 --textproto to specify a path to a text proto file containing a V1 action.
 This can be copy-pasted from an Execute call in the grpc log.
 
+For V2 API and later, you can skip specifying the action digest if `grpc_log` is
+specified. In this case, the tool will scan the log for failed actions. If a
+single failed action is found, it will use that action's digest. If multiple
+failed actions are found, it will print a list of options.
+
+    $ bazel-bin/remote_client \
+        --remote_cache=localhost:8080 \
+        --grpc_log=/tmp/grpclog
+        run
+
+
 ## Developer Information
 
 ### Third-party Dependencies

diff --git a/WORKSPACE b/WORKSPACE
@@ -32,6 +32,16 @@ new_http_archive(
     build_file = "BUILD.remoteapis",
 )
 
+# Bazel toolchains
+http_archive(
+  name = "bazel_toolchains",
+  urls = [
+    "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/bc0091adceaf4642192a8dcfc46e3ae3e4560ea7.tar.gz",
+    "https://github.com/bazelbuild/bazel-toolchains/archive/bc0091adceaf4642192a8dcfc46e3ae3e4560ea7.tar.gz",
+  ],
+  strip_prefix = "bazel-toolchains-bc0091adceaf4642192a8dcfc46e3ae3e4560ea7",
+  sha256 = "7e85a14821536bc24e04610d309002056f278113c6cc82f1059a609361812431",
+)
 load("//3rdparty:workspace.bzl", "maven_dependencies", "declare_maven")
 
 maven_dependencies(declare_maven)
diff --git a/src/main/java/com/google/devtools/build/remote/client/ActionGrouping.java b/src/main/java/com/google/devtools/build/remote/client/ActionGrouping.java
@@ -1,14 +1,20 @@
 package com.google.devtools.build.remote.client;
 
+import build.bazel.remote.execution.v2.Digest;
+import build.bazel.remote.execution.v2.ExecuteResponse;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Multiset;
 import com.google.common.collect.TreeMultiset;
 import com.google.devtools.build.lib.remote.logging.RemoteExecutionLog.LogEntry;
 import com.google.protobuf.util.Timestamps;
+import io.grpc.Status;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /** A class to handle GRPc log grouped by actions */
 final class ActionGrouping {
@@ -21,46 +27,198 @@ final class ActionGrouping {
 
   @VisibleForTesting static final String actionString = "Entries for action with hash '%s'\n";
 
-  // Key: actionId; Value: a set of associated log entries.
-  Map<String, Multiset<LogEntry>> actionMap = new HashMap<>();
-  int numSkipped = 0;
+  @VisibleForTesting
+  static class ActionDetails {
+    final Multiset<LogEntry> log;
+    final Digest digest;
+    final String actionId;
+    final ExecuteResponse executeResponse;
 
-  public void addLogEntry(LogEntry entry) {
-    if (!entry.hasMetadata()) {
-      numSkipped++;
-      return;
+    private ActionDetails(
+        Multiset<LogEntry> log, Digest digest, String actionId, ExecuteResponse executeResponse) {
+      this.log = log;
+      this.digest = digest;
+      this.actionId = actionId;
+      this.executeResponse = executeResponse;
     }
-    String hash = entry.getMetadata().getActionId();
-    if (!actionMap.containsKey(hash)) {
-      actionMap.put(
-          hash,
-          TreeMultiset.create(
-              (a, b) -> {
-                int i = Timestamps.compare(a.getStartTime(), b.getStartTime());
-                if (i != 0) {
-                  return i;
-                }
-                // In the improbable case of the same timestamp, ensure the messages do not
-                // override each other.
-                return a.hashCode() - b.hashCode();
-              }));
+
+    Digest getDigest() {
+      return digest;
     }
-    actionMap.get(hash).add(entry);
-  }
 
-  public void printByAction(PrintWriter out) throws IOException {
+    public ExecuteResponse getExecuteResponse() {
+      return executeResponse;
+    }
+
+    // We will consider an action to be failed if either:
+    //  - we successfully received the execution result but the status is non-zero
+    //  - we successfully received the action result but the exit code is non-zero
+    boolean isFailed() {
+      if (executeResponse == null) {
+        // Action was not successfully completed (either cancelled or RPC error)
+        // We don't know if it's a failing action.
+        return false;
+      }
+
+      if (executeResponse.hasStatus()
+          && executeResponse.getStatus().getCode() != Status.Code.OK.value()) {
+        // Errors such as PERMISSION_DENIED or DEADLINE_EXCEEDED
+        return true;
+      }
+
+      // Return true if the action was not successful
+      return executeResponse.hasResult() && executeResponse.getResult().getExitCode() != 0;
+    }
+
+    Iterable<? extends LogEntry> getSortedElements() {
+      return log;
+    }
+
+    public static class Builder {
+      Multiset<LogEntry> log;
+      Digest digest;
+      String actionId;
+      ExecuteResponse executeResponse;
+
+      public Builder(String actionId) {
+        log =
+            TreeMultiset.create(
+                (a, b) -> {
+                  int i = Timestamps.compare(a.getStartTime(), b.getStartTime());
+                  if (i != 0) {
+                    return i;
+                  }
+                  // In the improbable case of the same timestamp, ensure the messages do not
+                  // override each other.
+                  return a.hashCode() - b.hashCode();
+                });
+        this.actionId = actionId;
+      }
+
+      void add(LogEntry entry) throws IOException {
+        log.add(entry);
+
+        Digest d = LogParserUtils.extractDigest(entry);
+        if (d != null) {
+          if (digest != null && !d.equals(digest)) {
+            System.err.println("Warning: conflicting digests: " + d + " and " + digest);
+          }
+          if (digest != null && digest.getHash() != actionId) {
+            System.err.println(
+                "Warning: bad digest: " + d + " doesn't match action Id " + actionId);
+          }
+          digest = d;
+        }
+
+        List<ExecuteResponse> r = LogParserUtils.extractExecuteResponse(entry);
+        if (r.size() > 0) {
+          if(r.size() > 1) {
+            System.err.println(
+                "Warning: unexpected log format: multiple ExecutionResponse for action " + actionId
+                    + " in LogEntry " + entry);
+          }
+          if (executeResponse != null && executeResponse.hasResult()) {
+            System.err.println(
+                "Warning: unexpected log format: multiple action results for action " + actionId);
+          }
+          executeResponse = r.get(r.size() - 1);
+        }
+      }
+
+      ActionDetails build() {
+        return new ActionDetails(log, digest, actionId, executeResponse);
+      }
+    }
+  };
+
+  private final Map<String, ActionDetails> actionMap;
+
+  // True if found V1 entries in the log.
+  private boolean V1found = false;
+
+  private ActionGrouping(boolean V1found, Map<String, ActionDetails> actionMap) {
+    this.V1found = V1found;
+    this.actionMap = actionMap;
+  };
+
+  void printByAction(PrintWriter out) throws IOException {
     for (String hash : actionMap.keySet()) {
       out.println(actionDelimiter);
       out.printf(actionString, hash);
       out.println(actionDelimiter);
-      for (LogEntry entry : actionMap.get(hash)) {
+      for (LogEntry entry : actionMap.get(hash).getSortedElements()) {
         LogParserUtils.printLogEntry(entry, out);
         out.println(entryDelimiter);
       }
     }
-    if (numSkipped > 0) {
-      System.err.printf(
-          "WARNING: Skipped %d entrie(s) due to absence of request metadata.\n", numSkipped);
+  }
+
+  List<Digest> failedActions() throws IOException {
+    if (V1found) {
+      System.err.println(
+          "This functinality is not supported for V1 API. Please upgrade your Bazel version.");
+      System.exit(1);
+    }
+
+    ArrayList<Digest> result = new ArrayList<>();
+
+    for (String hash : actionMap.keySet()) {
+      ActionDetails a = actionMap.get(hash);
+      if (a.isFailed()) {
+        Digest digest = a.getDigest();
+        if (digest == null) {
+          System.err.println("Error: missing digest for failed action " + hash);
+        } else {
+          result.add(digest);
+        }
+      }
+    }
+
+    return result;
+  }
+
+  public static class Builder {
+    private Map<String, ActionDetails.Builder> actionMap = new LinkedHashMap<>();
+    // True if found V1 entries in the log.
+    private boolean V1found = false;
+
+    // The number of entries skipped
+    private int numSkipped = 0;
+
+    void addLogEntry(LogEntry entry) throws IOException {
+      if (entry.hasDetails() && LogParserUtils.isV1Entry(entry.getDetails())) {
+        V1found = true;
+      }
+
+      if (!entry.hasMetadata()) {
+        numSkipped++;
+        return;
+      }
+      String hash = entry.getMetadata().getActionId();
+
+      if (!actionMap.containsKey(hash)) {
+        actionMap.put(hash, new ActionDetails.Builder(hash));
+      }
+      actionMap.get(hash).add(entry);
+    }
+
+    public ActionGrouping build() {
+      if (numSkipped > 0) {
+        System.err.printf(
+            "WARNING: Skipped %d entrie(s) due to absence of request metadata.\n", numSkipped);
+      }
+      Map<String, ActionDetails> builtActionMap =
+          actionMap.entrySet().stream()
+              .collect(
+                  Collectors.toMap(
+                      Map.Entry::getKey,
+                      e -> e.getValue().build(),
+                      (u, v) -> {
+                        throw new IllegalStateException(String.format("Duplicate key %s", u));
+                      },
+                      LinkedHashMap::new));
+
+      return new ActionGrouping(V1found, builtActionMap);
     }
   }
 }