ctexplain: first functional check-in

#11511 set up basic project structure. This PR adds minimum working functionality. Specifically, you can run it with a build command and it reports basic stats on the build's graph. Example: ``` $ bazel-bin/tools/ctexplain/ctexplain -b "//testapp:foo" Collecting configured targets for //testapp:foo... done in 0.62 s. Configurations: 3 Targets: 79 Configured targets: 92 (+16.5% vs. targets) Targets with multiple configs: 13 ``` Notes: * Changed import structure to prefer module imports over function, class imports (style guide recommendation) * Set up structure for injecting arbitrary analyses. Each analysis consumes the build's set of configured targets and can output whatever it wants. * Implemented one basic analysis * Structured code to make it easy to fork output formatters (e.g. for machine-readable output). But tried not to add speculative inheritance / boilerplate too soon Context: [Measuring Configuration Overhead](https://docs.google.com/document/d/10ZxO2wZdKJATnYBqAm22xT1k5r4Vp6QX96TkqSUIhs0/edit). Work towards #10613 Closes #11829. PiperOrigin-RevId: 328325094
bazelbuild · Aug 25, 2020 · c1d7087 · c1d7087
1 parent 11f233f
commit c1d7087
Show file tree

Hide file tree

Showing 10 changed files with 573 additions and 35 deletions.
diff --git a/tools/ctexplain/BUILD b/tools/ctexplain/BUILD
@@ -6,11 +6,32 @@ package(default_visibility = ["//visibility:public"])
 
 licenses(["notice"])  # Apache 2.0
 
+filegroup(
+    name = "srcs",
+    srcs = glob(["**"]),
+)
+
 py_binary(
     name = "ctexplain",
     srcs = ["ctexplain.py"],
     python_version = "PY3",
-    deps = [":bazel_api"],
+    deps = [
+        ":analyses",
+        ":base",
+        ":bazel_api",
+        ":lib",
+        "//third_party/py/abseil",
+    ],
+)
+
+py_library(
+    name = "lib",
+    srcs = ["lib.py"],
+    srcs_version = "PY3ONLY",
+    deps = [
+        ":base",
+        ":bazel_api",
+    ],
 )
 
 py_library(
@@ -20,6 +41,38 @@ py_library(
     deps = [":base"],
 )
 
+py_library(
+    name = "analyses",
+    srcs = ["analyses/summary.py"],
+    srcs_version = "PY3ONLY",
+    deps = [":base"],
+)
+
+py_library(
+    name = "base",
+    srcs = [
+        "types.py",
+        "util.py",
+    ],
+    srcs_version = "PY3ONLY",
+    deps = [
+        "//third_party/py/dataclasses",  # Backport for Python < 3.7.
+        "//third_party/py/frozendict",
+    ],
+)
+
+py_test(
+    name = "lib_test",
+    size = "small",
+    srcs = ["lib_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":bazel_api",
+        ":lib",
+        "//src/test/py/bazel:test_base",
+    ],
+)
+
 py_test(
     name = "bazel_api_test",
     size = "small",
@@ -31,15 +84,15 @@ py_test(
     ],
 )
 
-py_library(
-    name = "base",
-    srcs = [
-        "types.py",
-    ],
-    srcs_version = "PY3ONLY",
+py_test(
+    name = "analyses_test",
+    size = "small",
+    srcs = ["analyses/summary_test.py"],
+    main = "analyses/summary_test.py",  # TODO: generalize this.
+    python_version = "PY3",
     deps = [
-        "//third_party/py/dataclasses",  # Backport for Python < 3.7.
-        "//third_party/py/frozendict",
+        ":analyses",
+        ":base",
     ],
 )
 
@@ -53,8 +106,3 @@ py_test(
         "//third_party/py/frozendict",
     ],
 )
-
-filegroup(
-    name = "srcs",
-    srcs = glob(["*"]),
-)
diff --git a/tools/ctexplain/analyses/summary.py b/tools/ctexplain/analyses/summary.py
@@ -0,0 +1,72 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Analysis that summarizes basic graph info."""
+from typing import Tuple
+
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from dataclasses import dataclass
+
+from tools.ctexplain.types import ConfiguredTarget
+# Do not edit this line. Copybara replaces it with PY2 migration helper..third_party.bazel.tools.ctexplain.util as util
+
+
+@dataclass(frozen=True)
+class _Summary():
+  """Analysis result."""
+  # Number of configurations in the build's configured target graph.
+  configurations: int
+  # Number of unique target labels.
+  targets: int
+  # Number of configured targets.
+  configured_targets: int
+  # Number of targets that produce multiple configured targets. This is more
+  # subtle than computing configured_targets - targets. For example, if
+  # targets=2 and configured_targets=4, that could mean both targets are
+  # configured twice. Or it could mean the first target is configured 3 times.
+  repeated_targets: int
+
+
+def analyze(cts: Tuple[ConfiguredTarget, ...]) -> _Summary:
+  """Runs the analysis on a build's configured targets."""
+  configurations = set()
+  targets = set()
+  label_count = {}
+  for ct in cts:
+    configurations.add(ct.config_hash)
+    targets.add(ct.label)
+    label_count[ct.label] = label_count.setdefault(ct.label, 0) + 1
+  configured_targets = len(cts)
+  repeated_targets = sum([1 for count in label_count.values() if count > 1])
+
+  return _Summary(
+      len(configurations), len(targets), configured_targets, repeated_targets)
+
+
+def report(result: _Summary) -> None:
+  """Reports analysis results to the user.
+
+  We intentionally make this its own function to make it easy to support other
+  output formats (like machine-readable) if we ever want to do that.
+
+  Args:
+    result: the analysis result
+  """
+  ct_surplus = util.percent_diff(result.targets, result.configured_targets)
+  print(f"""
+Configurations: {result.configurations}
+Targets: {result.targets}
+Configured targets: {result.configured_targets} ({ct_surplus} vs. targets)
+Targets with multiple configs: {result.repeated_targets}
+""")
diff --git a/tools/ctexplain/analyses/summary_test.py b/tools/ctexplain/analyses/summary_test.py
@@ -0,0 +1,46 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for summary.py."""
+import unittest
+
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from frozendict import frozendict
+
+# Do not edit this line. Copybara replaces it with PY2 migration helper..third_party.bazel.tools.ctexplain.analyses.summary as summary
+from tools.ctexplain.types import Configuration
+from tools.ctexplain.types import ConfiguredTarget
+from tools.ctexplain.types import NullConfiguration
+
+
+class SummaryTest(unittest.TestCase):
+
+  def testAnalysis(self):
+    config1 = Configuration(None, frozendict({'a': frozendict({'b': 'c'})}))
+    config2 = Configuration(None, frozendict({'d': frozendict({'e': 'f'})}))
+
+    ct1 = ConfiguredTarget('//foo', config1, 'hash1', None)
+    ct2 = ConfiguredTarget('//foo', config2, 'hash2', None)
+    ct3 = ConfiguredTarget('//foo', NullConfiguration(), 'null', None)
+    ct4 = ConfiguredTarget('//bar', config1, 'hash1', None)
+
+    res = summary.analyze((ct1, ct2, ct3, ct4))
+    self.assertEqual(3, res.configurations)
+    self.assertEqual(2, res.targets)
+    self.assertEqual(4, res.configured_targets)
+    self.assertEqual(1, res.repeated_targets)
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tools/ctexplain/bazel_api.py b/tools/ctexplain/bazel_api.py
@@ -43,11 +43,11 @@ def run_bazel_in_client(args: List[str]) -> Tuple[int, List[str], List[str]]:
     Tuple of (return code, stdout, stderr)
   """
   result = subprocess.run(
-      ["bazel"] + args,
+      ["blaze"] + args,
       cwd=os.getcwd(),
       stdout=subprocess.PIPE,
       stderr=subprocess.PIPE,
-      check=True)
+      check=False)
   return (result.returncode, result.stdout.decode("utf-8").split(os.linesep),
           result.stderr)
 
@@ -73,17 +73,23 @@ def cquery(self,
       stderr contains the query's stderr (regardless of success value), and cts
       is the configured targets found by the query if successful, empty
       otherwise.
+
+      ct order preserves cquery's output order. This is topologically sorted
+      with duplicates removed. So no unique configured target appears twice and
+      if A depends on B, A appears before B.
     """
     base_args = ["cquery", "--show_config_fragments=transitive"]
     (returncode, stdout, stderr) = self.run_bazel(base_args + args)
     if returncode != 0:
       return (False, stderr, ())
 
-    cts = set()
+    cts = []
     for line in stdout:
+      if not line.strip():
+        continue
       ctinfo = _parse_cquery_result_line(line)
       if ctinfo is not None:
-        cts.add(ctinfo)
+        cts.append(ctinfo)
 
     return (True, stderr, tuple(cts))
 
@@ -97,7 +103,7 @@ def get_config(self, config_hash: str) -> Configuration:
       The matching configuration or None if no match is found.
 
     Raises:
-      ValueError on any parsing problems.
+      ValueError: On any parsing problems.
     """
     if config_hash == "HOST":
       return HostConfiguration()
@@ -109,11 +115,13 @@ def get_config(self, config_hash: str) -> Configuration:
     if returncode != 0:
       raise ValueError("Could not get config: " + stderr)
     config_json = json.loads(os.linesep.join(stdout))
-    fragments = [
-        fragment["name"].split(".")[-1] for fragment in config_json["fragments"]
-    ]
+    fragments = frozendict({
+        _base_name(entry["name"]):
+        tuple(_base_name(clazz) for clazz in entry["fragmentOptions"])
+        for entry in config_json["fragments"]
+    })
     options = frozendict({
-        entry["name"].split(".")[-1]: frozendict(entry["options"])
+        _base_name(entry["name"]): frozendict(entry["options"])
         for entry in config_json["fragmentOptions"]
     })
     return Configuration(fragments, options)
@@ -156,3 +164,19 @@ def _parse_cquery_result_line(line: str) -> ConfiguredTarget:
       config=None,  # Not yet available: we'll need `bazel config` to get this.
       config_hash=config_hash,
       transitive_fragments=fragments)
+
+
+def _base_name(full_name: str) -> str:
+  """Strips a fully qualified Java class name to the file scope.
+
+  Examples:
+    - "A.B.OuterClass" -> "OuterClass"
+    - "A.B.OuterClass$InnerClass" -> "OuterClass$InnerClass"
+
+  Args:
+    full_name: Fully qualified class name.
+
+  Returns:
+    Stripped name.
+  """
+  return full_name.split(".")[-1]
diff --git a/tools/ctexplain/bazel_api_test.py b/tools/ctexplain/bazel_api_test.py
@@ -77,7 +77,7 @@ def testGetTargetConfig(self):
     config = self._bazel_api.get_config(cts[0].config_hash)
     expected_fragments = ['PlatformConfiguration', 'JavaConfiguration']
     for exp in expected_fragments:
-      self.assertIn(exp, config.fragments)
+      self.assertIn(exp, config.fragments.keys())
     core_options = config.options['CoreOptions']
     self.assertIsNotNone(core_options)
     self.assertIn(('stamp', 'false'), core_options.items())
@@ -111,6 +111,16 @@ def testGetNullConfig(self):
     self.assertEqual(len(config.fragments), 0)
     self.assertEqual(len(config.options), 0)
 
+  def testConfigFragmentsMap(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cts = self._bazel_api.cquery(['//testapp:fg'])[2]
+    fragments_map = self._bazel_api.get_config(cts[0].config_hash).fragments
+    self.assertIn('PlatformOptions', fragments_map['PlatformConfiguration'])
+    self.assertIn('ShellConfiguration$Options',
+                  fragments_map['ShellConfiguration'])
+
   def testConfigWithDefines(self):
     self.ScratchFile('testapp/BUILD', [
         'filegroup(name = "fg", srcs = ["a.file"])',