From c4461331d0a8515d5910bdd8215b9f54582ccc82 Mon Sep 17 00:00:00 2001 From: Daniel Goldman Date: Thu, 17 Nov 2022 13:02:51 -0500 Subject: [PATCH] Add debug goals to python (#17057) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See #17039. Given a testbed of
input ```python # Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). import json # unownable, root level import os.path # unownable, not root level import watchdog # dependency not included import yaml # dependency included import yamlpath # owned by other resolve try: import weakimport # weakimport missing except ImportError: ... open("src/python/configs/prod.json") # asset open("testprojects/pants-plugins/src/python/test_pants_plugin/__init__.py") ```
we get
output ``` { "src/python/pants/backend/python/dependency_inference/t.py": { "imports": [ { "name": "weakimport", "reference": { "lineno": 12, "weak": true }, "resolved": { "status": "ImportOwnerStatus.weak_ignore", "address": [] }, "possible_resolve": null }, { "name": "json", "reference": { "lineno": 4, "weak": false }, "resolved": { "status": "ImportOwnerStatus.unownable", "address": [] }, "possible_resolve": null }, { "name": "os.path", "reference": { "lineno": 5, "weak": false }, "resolved": { "status": "ImportOwnerStatus.unownable", "address": [] }, "possible_resolve": null }, { "name": "watchdog", "reference": { "lineno": 7, "weak": false }, "resolved": { "status": "ImportOwnerStatus.unowned", "address": [] }, "possible_resolve": null }, { "name": "yaml", "reference": { "lineno": 8, "weak": false }, "resolved": { "status": "ImportOwnerStatus.unambiguous", "address": [ "3rdparty/python#PyYAML", "3rdparty/python#types-PyYAML" ] }, "possible_resolve": null }, { "name": "yamlpath", "reference": { "lineno": 9, "weak": false }, "resolved": { "status": "ImportOwnerStatus.unowned", "address": [] }, "possible_resolve": [ [ "src/python/pants/backend/helm/subsystems:yamlpath", "helm-post-renderer" ] ] } ], "assets": [ { "name": "src/python/configs/prod.json", "reference": "src/python/configs/prod.json", "resolved": { "status": "ImportOwnerStatus.unowned", "address": [] }, "possible_resolve": null }, { "name": "testprojects/pants-plugins/src/python/test_pants_plugin/__init__.py", "reference": "testprojects/pants-plugins/src/python/test_pants_plugin/__init__.py", "resolved": { "status": "ImportOwnerStatus.unambiguous", "address": [ "testprojects/pants-plugins/src/python/test_pants_plugin/__init__.py:../../../../pants_plugins_directory" ] }, "possible_resolve": null } ] } } ```
Telling you, for each file, for each import, what dependencies pants thought it could have, and what it decided to do with them. This uses almost all the same code as the main dependency inference code, with the exception of the top-level orchestration of it. I think that's pretty close, there's about 100 lines of semi-duplicate code. There's also a more advanced mode that dumps information about each stage of the process. I think this might be useful for people digging through the dependency inference process but not really for end-users. we get it for free, though. Fixes #17039. --- this is fairly critical for performance, so here are benchmarks (with comparison-of-means t-test) | | main | this | difference | P-score | | --- | --- | --- | --- | --- | | `hyperfine --runs=10 './pants --no-pantsd dependencies --transitive ::'` | 21.839 s ± 0.326 s | 22.142 s ± 0.283 s | 1.38% | 0.0395 | | `hyperfine --warmup=1 --runs=10 './pants dependencies --transitive ::'` | 1.798 s ± 0.074 s | 1.811 s ± 0.076 s | 0.72% | 0.7029 | | `hyperfine --runs=10 './pants --no-pantsd dependencies ::'` | 21.547 s ± 0.640 s | 21.863 s ± 1.072 s | 1.47% | 0.4339 | | `hyperfine --warmup=1 --runs=10 './pants dependencies ::'` | 1.828 s ± 0.091 s | 1.844 s ± 0.105 s | 0.88% | 0.7200 | So it looks like this MR might impact performance, by about 1%, although those p-values are mighty unconvincing. LMK if we want to increase runs and get more statistics, I've run the stats a few times throughout and this looks about right, so I think we can proceed with the review under the assumption that there is currently a 1% performance overhead. I'm open to suggestions on improving performance. --- .../backend/experimental/python/register.py | 3 +- src/python/pants/backend/project_info/peek.py | 13 +- .../dependency_inference/module_mapper.py | 6 +- .../parse_python_dependencies.py | 1 - .../python/dependency_inference/rules.py | 297 +++++++++++++----- .../python/dependency_inference/rules_test.py | 272 +++++++++++++++- .../pants/backend/python/goals/debug_goals.py | 198 ++++++++++++ .../backend/python/goals/debug_goals_test.py | 141 +++++++++ 8 files changed, 851 insertions(+), 80 deletions(-) create mode 100644 src/python/pants/backend/python/goals/debug_goals.py create mode 100644 src/python/pants/backend/python/goals/debug_goals_test.py diff --git a/src/python/pants/backend/experimental/python/register.py b/src/python/pants/backend/experimental/python/register.py index 20db953dcaa..07b4e2bd8b7 100644 --- a/src/python/pants/backend/experimental/python/register.py +++ b/src/python/pants/backend/experimental/python/register.py @@ -2,7 +2,7 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). from pants.backend.codegen import export_codegen_goal -from pants.backend.python.goals import publish +from pants.backend.python.goals import debug_goals, publish from pants.backend.python.subsystems import setuptools_scm, twine from pants.backend.python.target_types import VCSVersion from pants.backend.python.util_rules import pex, vcs_versioning @@ -16,6 +16,7 @@ def rules(): *setuptools_scm.rules(), *export_codegen_goal.rules(), *twine.rules(), + *debug_goals.rules(), ) diff --git a/src/python/pants/backend/project_info/peek.py b/src/python/pants/backend/project_info/peek.py index 8da464f4616..b68752b45ca 100644 --- a/src/python/pants/backend/project_info/peek.py +++ b/src/python/pants/backend/project_info/peek.py @@ -15,6 +15,7 @@ from pants.engine.target import ( Dependencies, DependenciesRequest, + Field, HydratedSources, HydrateSourcesRequest, SourcesField, @@ -89,12 +90,22 @@ class _PeekJsonEncoder(json.JSONEncoder): def default(self, o): """Return a serializable object for o.""" + if isinstance(o, str): # early exit prevents strings from being treated as sequences + return o + if o is None: + return o if is_dataclass(o): return asdict(o) if isinstance(o, collections.abc.Mapping): return dict(o) - if isinstance(o, collections.abc.Sequence): + if ( + isinstance(o, collections.abc.Sequence) + or isinstance(o, set) + or isinstance(o, collections.abc.Set) + ): return list(o) + if isinstance(o, Field): + return self.default(o.value) try: return super().default(o) except TypeError: diff --git a/src/python/pants/backend/python/dependency_inference/module_mapper.py b/src/python/pants/backend/python/dependency_inference/module_mapper.py index d0202a60046..d80e8b82a5a 100644 --- a/src/python/pants/backend/python/dependency_inference/module_mapper.py +++ b/src/python/pants/backend/python/dependency_inference/module_mapper.py @@ -384,8 +384,10 @@ def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None: class PythonModuleOwners: """The target(s) that own a Python module. - If >1 targets own the same module, and they're implementations (vs .pyi type stubs), they will - be put into `ambiguous` instead of `unambiguous`. `unambiguous` should never be > 2. + Up to 2 targets can unambiguously own the same module, if one is an implementation and the other + is a .pyi type stub. It is ambiguous for >1 implementation target to own the same module, and + those targets will be put into `ambiguous` instead of `unambiguous`. Therefore, `unambiguous` + should never be >2; and only 1 of `unambiguous` and `ambiguous` should have targets. """ unambiguous: tuple[Address, ...] diff --git a/src/python/pants/backend/python/dependency_inference/parse_python_dependencies.py b/src/python/pants/backend/python/dependency_inference/parse_python_dependencies.py index 96b897397b9..66e91818aa4 100644 --- a/src/python/pants/backend/python/dependency_inference/parse_python_dependencies.py +++ b/src/python/pants/backend/python/dependency_inference/parse_python_dependencies.py @@ -1,6 +1,5 @@ # Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). - import json from dataclasses import dataclass diff --git a/src/python/pants/backend/python/dependency_inference/rules.py b/src/python/pants/backend/python/dependency_inference/rules.py index 50f5c7219a5..c1ae9274c52 100644 --- a/src/python/pants/backend/python/dependency_inference/rules.py +++ b/src/python/pants/backend/python/dependency_inference/rules.py @@ -5,11 +5,10 @@ import itertools import logging -from collections import defaultdict from dataclasses import dataclass from enum import Enum from pathlib import PurePath -from typing import DefaultDict, Iterable, Iterator +from typing import Dict, Iterable, Optional from pants.backend.python.dependency_inference import module_mapper, parse_python_dependencies from pants.backend.python.dependency_inference.default_unowned_dependencies import ( @@ -222,8 +221,8 @@ def _get_inferred_asset_deps( assets_by_path: AllAssetTargetsByPath, assets: ParsedPythonAssetPaths, explicitly_provided_deps: ExplicitlyProvidedDependencies, -) -> Iterator[Address]: - for filepath in assets: +) -> dict[str, ImportResolveResult]: + def _resolve_single_asset(filepath) -> ImportResolveResult: # NB: Resources in Python's ecosystem are loaded relative to a package, so we only try and # query for a resource relative to requesting module's path # (I.e. we assume the user is doing something like `pkgutil.get_data(__file__, "foo/bar")`) @@ -245,6 +244,9 @@ def _get_inferred_asset_deps( if inferred_tgts: possible_addresses = tuple(tgt.address for tgt in inferred_tgts) + if len(possible_addresses) == 1: + return ImportResolveResult(ImportOwnerStatus.unambiguous, possible_addresses) + explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference( possible_addresses, address, @@ -253,7 +255,28 @@ def _get_inferred_asset_deps( ) maybe_disambiguated = explicitly_provided_deps.disambiguated(possible_addresses) if maybe_disambiguated: - yield maybe_disambiguated + return ImportResolveResult(ImportOwnerStatus.disambiguated, (maybe_disambiguated,)) + else: + return ImportResolveResult(ImportOwnerStatus.ambiguous) + else: + return ImportResolveResult(ImportOwnerStatus.unowned) + + return {filepath: _resolve_single_asset(filepath) for filepath in assets} + + +class ImportOwnerStatus(Enum): + unambiguous = "unambiguous" + disambiguated = "disambiguated" + ambiguous = "ambiguous" + unowned = "unowned" + weak_ignore = "weak_ignore" + unownable = "unownable" + + +@dataclass(frozen=True) +class ImportResolveResult: + status: ImportOwnerStatus + address: tuple[Address, ...] = () def _get_imports_info( @@ -261,30 +284,118 @@ def _get_imports_info( owners_per_import: Iterable[PythonModuleOwners], parsed_imports: ParsedPythonImports, explicitly_provided_deps: ExplicitlyProvidedDependencies, -) -> tuple[set[Address], set[str]]: - inferred_deps: set[Address] = set() - unowned_imports: set[str] = set() +) -> dict[str, ImportResolveResult]: + def _resolve_single_import(owners, import_name) -> ImportResolveResult: + if owners.unambiguous: + return ImportResolveResult(ImportOwnerStatus.unambiguous, owners.unambiguous) - for owners, imp in zip(owners_per_import, parsed_imports): - inferred_deps.update(owners.unambiguous) explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference( owners.ambiguous, address, import_reference="module", - context=f"The target {address} imports `{imp}`", + context=f"The target {address} imports `{import_name}`", ) maybe_disambiguated = explicitly_provided_deps.disambiguated(owners.ambiguous) if maybe_disambiguated: - inferred_deps.add(maybe_disambiguated) - + return ImportResolveResult(ImportOwnerStatus.disambiguated, (maybe_disambiguated,)) + elif import_name.split(".")[0] in DEFAULT_UNOWNED_DEPENDENCIES: + return ImportResolveResult(ImportOwnerStatus.unownable) + elif parsed_imports[import_name].weak: + return ImportResolveResult(ImportOwnerStatus.weak_ignore) + else: + return ImportResolveResult(ImportOwnerStatus.unowned) + + return { + imp: _resolve_single_import(owners, imp) + for owners, (imp, inf) in zip(owners_per_import, parsed_imports.items()) + } + + +def _collect_imports_info( + resolve_result: dict[str, ImportResolveResult] +) -> tuple[frozenset[Address], frozenset[str]]: + """Collect import resolution results into: + + - imports (direct and disambiguated) + - unowned + """ + + return frozenset( + addr + for dep in resolve_result.values() + for addr in dep.address if ( - not owners.unambiguous - and imp.split(".")[0] not in DEFAULT_UNOWNED_DEPENDENCIES - and not parsed_imports[imp].weak - ): - unowned_imports.add(imp) + dep.status == ImportOwnerStatus.unambiguous + or dep.status == ImportOwnerStatus.disambiguated + ) + ), frozenset( + imp for imp, dep in resolve_result.items() if dep.status == ImportOwnerStatus.unowned + ) + - return inferred_deps, unowned_imports +@dataclass(frozen=True) +class UnownedImportsPossibleOwnersRequest: + """A request to find possible owners for several imports originating in a resolve.""" + + unowned_imports: frozenset[str] + original_resolve: str + + +@dataclass(frozen=True) +class UnownedImportPossibleOwnerRequest: + unowned_import: str + original_resolve: str + + +@dataclass(frozen=True) +class UnownedImportsPossibleOwners: + value: Dict[str, list[tuple[Address, ResolveName]]] + + +@dataclass(frozen=True) +class UnownedImportPossibleOwners: + value: list[tuple[Address, ResolveName]] + + +@rule_helper +async def _find_other_owners_for_unowned_imports( + req: UnownedImportsPossibleOwnersRequest, +) -> UnownedImportsPossibleOwners: + individual_possible_owners = await MultiGet( + Get(UnownedImportPossibleOwners, UnownedImportPossibleOwnerRequest(r, req.original_resolve)) + for r in req.unowned_imports + ) + + return UnownedImportsPossibleOwners( + { + imported_module: possible_owners.value + for imported_module, possible_owners in zip( + req.unowned_imports, individual_possible_owners + ) + if possible_owners.value + } + ) + + +@rule +async def find_other_owners_for_unowned_import( + req: UnownedImportPossibleOwnerRequest, + python_setup: PythonSetup, +) -> UnownedImportPossibleOwners: + other_owner_from_other_resolves = await Get( + PythonModuleOwners, PythonModuleOwnersRequest(req.unowned_import, resolve=None) + ) + + owners = other_owner_from_other_resolves + other_owners_as_targets = await Get(Targets, Addresses(owners.unambiguous + owners.ambiguous)) + + other_owners = [] + + for t in other_owners_as_targets: + other_owner_resolve = t[PythonResolveField].normalized_value(python_setup) + if other_owner_resolve != req.original_resolve: + other_owners.append((t.address, other_owner_resolve)) + return UnownedImportPossibleOwners(other_owners) @rule_helper @@ -292,7 +403,7 @@ async def _handle_unowned_imports( address: Address, unowned_dependency_behavior: UnownedDependencyUsage, python_setup: PythonSetup, - unowned_imports: Iterable[str], + unowned_imports: frozenset[str], parsed_imports: ParsedPythonImports, resolve: str, ) -> None: @@ -301,25 +412,11 @@ async def _handle_unowned_imports( other_resolves_snippet = "" if len(python_setup.resolves) > 1: - other_owners_from_other_resolves = await MultiGet( - Get(PythonModuleOwners, PythonModuleOwnersRequest(imported_module, resolve=None)) - for imported_module in unowned_imports - ) - other_owners_as_targets = await MultiGet( - Get(Targets, Addresses(owners.unambiguous + owners.ambiguous)) - for owners in other_owners_from_other_resolves - ) - - imports_to_other_owners: DefaultDict[str, list[tuple[Address, ResolveName]]] = defaultdict( - list - ) - for imported_module, targets in zip(unowned_imports, other_owners_as_targets): - for t in targets: - other_owner_resolve = t[PythonResolveField].normalized_value(python_setup) - if other_owner_resolve != resolve: - imports_to_other_owners[imported_module].append( - (t.address, other_owner_resolve) - ) + imports_to_other_owners = ( + await _find_other_owners_for_unowned_imports( + UnownedImportsPossibleOwnersRequest(unowned_imports, resolve), + ) + ).value if imports_to_other_owners: other_resolves_lines = [] @@ -359,23 +456,19 @@ async def _handle_unowned_imports( raise UnownedDependencyError(msg) -@rule(desc="Inferring Python dependencies by analyzing source") -async def infer_python_dependencies_via_source( - request: InferPythonImportDependencies, +@rule_helper +async def _exec_parse_deps( + field_set: PythonImportDependenciesInferenceFieldSet, python_infer_subsystem: PythonInferSubsystem, python_setup: PythonSetup, -) -> InferredDependencies: - if not python_infer_subsystem.imports and not python_infer_subsystem.assets: - return InferredDependencies([]) - - address = request.field_set.address +) -> ParsedPythonDependencies: interpreter_constraints = InterpreterConstraints.create_from_compatibility_fields( - [request.field_set.interpreter_constraints], python_setup + [field_set.interpreter_constraints], python_setup ) - parsed_dependencies = await Get( + resp = await Get( ParsedPythonDependencies, ParsePythonDependenciesRequest( - request.field_set.source, + field_set.source, interpreter_constraints, string_imports=python_infer_subsystem.string_imports, string_imports_min_dots=python_infer_subsystem.string_imports_min_dots, @@ -383,11 +476,32 @@ async def infer_python_dependencies_via_source( assets_min_slashes=python_infer_subsystem.assets_min_slashes, ), ) + return resp + + +@dataclass(frozen=True) +class ResolvedParsedPythonDependenciesRequest: + field_set: PythonImportDependenciesInferenceFieldSet + parsed_dependencies: ParsedPythonDependencies + resolve: Optional[str] + + +@dataclass(frozen=True) +class ResolvedParsedPythonDependencies: + resolve_results: dict[str, ImportResolveResult] + assets: dict[str, ImportResolveResult] + explicit: ExplicitlyProvidedDependencies + + +@rule +async def resolve_parsed_dependencies( + request: ResolvedParsedPythonDependenciesRequest, + python_infer_subsystem: PythonInferSubsystem, +) -> ResolvedParsedPythonDependencies: + """Find the owning targets for the parsed dependencies.""" - inferred_deps: set[Address] = set() - unowned_imports: set[str] = set() - parsed_imports = parsed_dependencies.imports - parsed_assets = parsed_dependencies.assets + parsed_imports = request.parsed_dependencies.imports + parsed_assets = request.parsed_dependencies.assets if not python_infer_subsystem.imports: parsed_imports = ParsedPythonImports([]) @@ -395,39 +509,74 @@ async def infer_python_dependencies_via_source( ExplicitlyProvidedDependencies, DependenciesRequest(request.field_set.dependencies) ) - resolve = request.field_set.resolve.normalized_value(python_setup) - if parsed_imports: - import_deps, unowned_imports = _get_imports_info( - address=address, - owners_per_import=await MultiGet( - Get(PythonModuleOwners, PythonModuleOwnersRequest(imported_module, resolve=resolve)) - for imported_module in parsed_imports - ), + owners_per_import = await MultiGet( + Get( + PythonModuleOwners, + PythonModuleOwnersRequest(imported_module, resolve=request.resolve), + ) + for imported_module in parsed_imports + ) + resolve_results = _get_imports_info( + address=request.field_set.address, + owners_per_import=owners_per_import, parsed_imports=parsed_imports, explicitly_provided_deps=explicitly_provided_deps, ) - inferred_deps.update(import_deps) + else: + resolve_results = {} if parsed_assets: all_asset_targets = await Get(AllAssetTargets, AllAssetTargetsRequest()) assets_by_path = await Get(AllAssetTargetsByPath, AllAssetTargets, all_asset_targets) - inferred_deps.update( - _get_inferred_asset_deps( - address, - request.field_set.source.file_path, - assets_by_path, - parsed_assets, - explicitly_provided_deps, - ) + asset_deps = _get_inferred_asset_deps( + request.field_set.address, + request.field_set.source.file_path, + assets_by_path, + parsed_assets, + explicitly_provided_deps, ) + else: + asset_deps = {} + + return ResolvedParsedPythonDependencies( + resolve_results=resolve_results, + assets=asset_deps, + explicit=explicitly_provided_deps, + ) + + +@rule(desc="Inferring Python dependencies by analyzing source") +async def infer_python_dependencies_via_source( + request: InferPythonImportDependencies, + python_infer_subsystem: PythonInferSubsystem, + python_setup: PythonSetup, +) -> InferredDependencies: + if not python_infer_subsystem.imports and not python_infer_subsystem.assets: + return InferredDependencies([]) + + parsed_dependencies = await _exec_parse_deps( + request.field_set, python_infer_subsystem, python_setup + ) + + resolve = request.field_set.resolve.normalized_value(python_setup) + + resolved_dependencies = await Get( + ResolvedParsedPythonDependencies, + ResolvedParsedPythonDependenciesRequest(request.field_set, parsed_dependencies, resolve), + ) + import_deps, unowned_imports = _collect_imports_info(resolved_dependencies.resolve_results) + + asset_deps, unowned_assets = _collect_imports_info(resolved_dependencies.assets) + + inferred_deps = import_deps | asset_deps _ = await _handle_unowned_imports( - address, + request.field_set.address, python_infer_subsystem.unowned_dependency_behavior, python_setup, unowned_imports, - parsed_imports, + parsed_dependencies.imports, resolve=resolve, ) @@ -531,6 +680,8 @@ async def infer_python_conftest_dependencies( # This is a separate function to facilitate tests registering import inference. def import_rules(): return [ + resolve_parsed_dependencies, + find_other_owners_for_unowned_import, infer_python_dependencies_via_source, *pex.rules(), *parse_python_dependencies.rules(), diff --git a/src/python/pants/backend/python/dependency_inference/rules_test.py b/src/python/pants/backend/python/dependency_inference/rules_test.py index 1438a04c006..903e2bb05e8 100644 --- a/src/python/pants/backend/python/dependency_inference/rules_test.py +++ b/src/python/pants/backend/python/dependency_inference/rules_test.py @@ -4,12 +4,20 @@ from __future__ import annotations from textwrap import dedent +from typing import Iterable import pytest from pants.backend.python import target_types_rules +from pants.backend.python.dependency_inference.module_mapper import PythonModuleOwners +from pants.backend.python.dependency_inference.parse_python_dependencies import ( + ParsedPythonImportInfo, + ParsedPythonImports, +) from pants.backend.python.dependency_inference.rules import ( ConftestDependenciesInferenceFieldSet, + ImportOwnerStatus, + ImportResolveResult, InferConftestDependencies, InferInitDependencies, InferPythonImportDependencies, @@ -19,6 +27,10 @@ PythonInferSubsystem, UnownedDependencyError, UnownedDependencyUsage, + UnownedImportsPossibleOwners, + UnownedImportsPossibleOwnersRequest, + _find_other_owners_for_unowned_imports, + _get_imports_info, import_rules, infer_python_conftest_dependencies, infer_python_init_dependencies, @@ -37,9 +49,10 @@ from pants.core.target_types import rules as core_target_types_rules from pants.engine.addresses import Address from pants.engine.internals.parametrize import Parametrize -from pants.engine.rules import SubsystemRule -from pants.engine.target import InferredDependencies +from pants.engine.rules import SubsystemRule, rule +from pants.engine.target import ExplicitlyProvidedDependencies, InferredDependencies from pants.testutil.rule_runner import PYTHON_BOOTSTRAP_ENV, QueryRule, RuleRunner, engine_error +from pants.util.ordered_set import FrozenOrderedSet from pants.util.strutil import softwrap @@ -461,8 +474,13 @@ def run_dep_inference(address: Address) -> InferredDependencies: @pytest.fixture def imports_rule_runner() -> RuleRunner: + return mk_imports_rule_runner([]) + + +def mk_imports_rule_runner(more_rules: Iterable) -> RuleRunner: return RuleRunner( rules=[ + *more_rules, *import_rules(), *target_types_rules.rules(), *core_target_types_rules(), @@ -627,3 +645,253 @@ def test_infer_python_strict_multiple_resolves(imports_rule_runner: RuleRunner) InferredDependencies, [InferPythonImportDependencies(PythonImportDependenciesInferenceFieldSet.create(tgt))], ) + + +class TestCategoriseImportsInfo: + address = Address("sample/path") + import_cases = { + "unambiguous": ( + ParsedPythonImportInfo(0, False), + PythonModuleOwners((Address("unambiguous.py"),)), + ), + "unambiguous_with_pyi": ( + ParsedPythonImportInfo(0, False), + PythonModuleOwners( + ( + Address("unambiguous_with_pyi.py"), + Address("unambiguous_with_pyi.pyi"), + ) + ), + ), + "ambiguous_disambiguatable": ( + ParsedPythonImportInfo(0, False), + PythonModuleOwners( + tuple(), + ( + Address("ambiguous_disambiguatable", target_name="good"), + Address("ambiguous_disambiguatable", target_name="bad"), + ), + ), + ), + "ambiguous_terminal": ( + ParsedPythonImportInfo(0, False), + PythonModuleOwners( + tuple(), + ( + Address("ambiguous_disambiguatable", target_name="bad0"), + Address("ambiguous_disambiguatable", target_name="bad1"), + ), + ), + ), + "json": ( + ParsedPythonImportInfo(0, False), + PythonModuleOwners(tuple()), + ), # unownable + "os.path": ( + ParsedPythonImportInfo(0, False), + PythonModuleOwners(tuple()), + ), # unownable, not root module + "weak_owned": ( + ParsedPythonImportInfo(0, True), + PythonModuleOwners((Address("weak_owned.py"),)), + ), + "weak_unowned": ( + ParsedPythonImportInfo(0, True), + PythonModuleOwners(tuple()), + ), + "unowned": ( + ParsedPythonImportInfo(0, False), + PythonModuleOwners(tuple()), + ), + } + + def filter_case(self, case_name: str, cases=None): + cases = cases or self.import_cases + return {case_name: cases[case_name]} + + def separate_owners_and_imports( + self, + imports_to_owners: dict[str, tuple[ParsedPythonImportInfo, PythonModuleOwners]], + ) -> tuple[list[PythonModuleOwners], ParsedPythonImports]: + owners_per_import = [x[1] for x in imports_to_owners.values()] + parsed_imports = ParsedPythonImports({k: v[0] for k, v in imports_to_owners.items()}) + return owners_per_import, parsed_imports + + def do_test(self, case_name: str, expected_status: ImportOwnerStatus) -> ImportResolveResult: + owners_per_import, parsed_imports = self.separate_owners_and_imports( + self.filter_case(case_name) + ) + resolve_result = _get_imports_info( + self.address, + owners_per_import, + parsed_imports, + ExplicitlyProvidedDependencies( + self.address, + FrozenOrderedSet(), + FrozenOrderedSet((Address("ambiguous_disambiguatable", target_name="bad"),)), + ), + ) + + assert len(resolve_result) == 1 and case_name in resolve_result + resolved = resolve_result[case_name] + assert resolved.status == expected_status + return resolved + + def test_unambiguous_imports(self, imports_rule_runner: RuleRunner) -> None: + case_name = "unambiguous" + resolved = self.do_test(case_name, ImportOwnerStatus.unambiguous) + assert resolved.address == self.import_cases[case_name][1].unambiguous + + def test_unambiguous_with_pyi(self, imports_rule_runner: RuleRunner) -> None: + case_name = "unambiguous_with_pyi" + resolved = self.do_test(case_name, ImportOwnerStatus.unambiguous) + assert resolved.address == self.import_cases[case_name][1].unambiguous + + def test_unownable_root(self, imports_rule_runner: RuleRunner) -> None: + case_name = "json" + self.do_test(case_name, ImportOwnerStatus.unownable) + + def test_unownable_nonroot(self, imports_rule_runner: RuleRunner) -> None: + case_name = "os.path" + self.do_test(case_name, ImportOwnerStatus.unownable) + + def test_weak_owned(self, imports_rule_runner: RuleRunner) -> None: + case_name = "weak_owned" + resolved = self.do_test(case_name, ImportOwnerStatus.unambiguous) + assert resolved.address == self.import_cases[case_name][1].unambiguous + + def test_weak_unowned(self, imports_rule_runner: RuleRunner) -> None: + case_name = "weak_unowned" + resolved = self.do_test(case_name, ImportOwnerStatus.weak_ignore) + assert resolved.address == tuple() + + def test_unowned(self, imports_rule_runner: RuleRunner) -> None: + case_name = "unowned" + resolved = self.do_test(case_name, ImportOwnerStatus.unowned) + assert resolved.address == tuple() + + def test_ambiguous_disambiguatable(self): + case_name = "ambiguous_disambiguatable" + resolved = self.do_test(case_name, ImportOwnerStatus.disambiguated) + assert resolved.address == (self.import_cases[case_name][1].ambiguous[0],) + + def test_ambiguous_not_disambiguatable(self): + case_name = "ambiguous_terminal" + resolved = self.do_test(case_name, ImportOwnerStatus.unowned) + assert resolved.address == () + + +class TestFindOtherOwners: + missing_import_name = "missing" + other_resolve = "other-resolve" + other_other_resolve = "other-other-resolve" + + @staticmethod + @rule + async def run_rule( + req: UnownedImportsPossibleOwnersRequest, + ) -> UnownedImportsPossibleOwners: + return await _find_other_owners_for_unowned_imports(req) + + @pytest.fixture + def _imports_rule_runner(self): + return mk_imports_rule_runner( + [ + self.run_rule, + QueryRule(UnownedImportsPossibleOwners, [UnownedImportsPossibleOwnersRequest]), + ] + ) + + def do_test(self, imports_rule_runner: RuleRunner): + resolves = {"python-default": "", self.other_resolve: "", self.other_other_resolve: ""} + imports_rule_runner.set_options( + [ + "--python-enable-resolves", + f"--python-resolves={resolves}", + ] + ) + + imports_rule_runner.write_files( + { + "project/cheesey.py": dedent( + f"""\ + import other.{self.missing_import_name} + """ + ), + "project/BUILD": "python_sources()", + } + ) + + return imports_rule_runner.request( + UnownedImportsPossibleOwners, + [ + UnownedImportsPossibleOwnersRequest( + frozenset((f"other.{self.missing_import_name}",)), "original_resolve" + ) + ], + ) + + def test_no_other_owners_found(self, _imports_rule_runner): + r = self.do_test(_imports_rule_runner) + assert not r.value + + def test_other_owners_found_in_single_resolve(self, _imports_rule_runner: RuleRunner): + + _imports_rule_runner.write_files( + { + "other/BUILD": dedent( + f"""\ + python_source( + name="{self.missing_import_name}", + source="{self.missing_import_name}.py", + resolve="{self.other_resolve}", + ) + """ + ), + f"other/{self.missing_import_name}.py": "", + } + ) + + r = self.do_test(_imports_rule_runner) + + as_module = f"other.{self.missing_import_name}" + assert as_module in r.value + assert r.value[as_module] == [ + ( + Address("other", target_name=self.missing_import_name), + self.other_resolve, + ) + ] + + def test_other_owners_found_in_multiple_resolves(self, _imports_rule_runner: RuleRunner): + + _imports_rule_runner.write_files( + { + "other/BUILD": dedent( + f"""\ + python_source( + name="{self.missing_import_name}", + source="{self.missing_import_name}.py", + resolve=parametrize("{self.other_resolve}", "{self.other_other_resolve}"), + ) + """ + ), + f"other/{self.missing_import_name}.py": "", + } + ) + + r = self.do_test(_imports_rule_runner) + + as_module = f"other.{self.missing_import_name}" + assert as_module in r.value + assert r.value[as_module] == [ + ( + Address( + "other", + target_name=self.missing_import_name, + parameters={"resolve": resolve}, + ), + resolve, + ) + for resolve in (self.other_other_resolve, self.other_resolve) + ] diff --git a/src/python/pants/backend/python/goals/debug_goals.py b/src/python/pants/backend/python/goals/debug_goals.py new file mode 100644 index 00000000000..a5c8daf6fc2 --- /dev/null +++ b/src/python/pants/backend/python/goals/debug_goals.py @@ -0,0 +1,198 @@ +# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +import json +from dataclasses import dataclass +from enum import Enum +from typing import Any, Optional, Union + +from pants.backend.project_info.peek import _PeekJsonEncoder +from pants.backend.python.dependency_inference.module_mapper import ResolveName +from pants.backend.python.dependency_inference.parse_python_dependencies import ( + ParsedPythonDependencies, + ParsedPythonImportInfo, +) +from pants.backend.python.dependency_inference.rules import ( + ImportResolveResult, + PythonImportDependenciesInferenceFieldSet, + PythonInferSubsystem, + ResolvedParsedPythonDependencies, + ResolvedParsedPythonDependenciesRequest, + UnownedImportsPossibleOwners, + UnownedImportsPossibleOwnersRequest, + _collect_imports_info, + _exec_parse_deps, + _find_other_owners_for_unowned_imports, + import_rules, +) +from pants.backend.python.goals.run_python_source import PythonSourceFieldSet +from pants.backend.python.subsystems.setup import PythonSetup +from pants.build_graph.address import Address +from pants.engine.console import Console +from pants.engine.goal import Goal, GoalSubsystem +from pants.engine.internals.selectors import Get, MultiGet +from pants.engine.rules import collect_rules, goal_rule, rule +from pants.engine.target import Targets +from pants.option.option_types import EnumOption +from pants.util.strutil import softwrap + + +class AnalysisFlavor(Enum): + raw_dependency_inference = "raw_dependency_inference" + dependency_inference = "dependency_inference" + + +class DumpPythonSourceAnalysisSubsystem(GoalSubsystem): + name = "python-dump-source-analysis" + help = "Dump source analysis for python_source targets." + + flavor = EnumOption( + "--analysis-flavor", + default=AnalysisFlavor.dependency_inference, + help=softwrap( + f"""\ + The type of information that should be returned.\n + * `{AnalysisFlavor.dependency_inference.value}`: The results of dependency inference, for every detected import in every file.\n + * `{AnalysisFlavor.raw_dependency_inference.value}`: The raw intermediate results of the dependency inference process, + at every stage they're available. + Potentially useful for debugging the dependency inference process.\n + """ + ), + ) + + +class DumpPythonSourceAnalysis(Goal): + subsystem_cls = DumpPythonSourceAnalysisSubsystem + + +@dataclass(frozen=True) +class PythonSourceAnalysis: + """Information on the inferred imports for a Python file, including all raw intermediate + results.""" + + fs: PythonImportDependenciesInferenceFieldSet + identified: ParsedPythonDependencies + resolved: ResolvedParsedPythonDependencies + possible_owners: UnownedImportsPossibleOwners + + +@rule +async def dump_python_source_analysis_single( + fs: PythonImportDependenciesInferenceFieldSet, + python_infer_subsystem: PythonInferSubsystem, + python_setup: PythonSetup, +) -> PythonSourceAnalysis: + """Infer the dependencies for a single python fieldset, keeping all the intermediate results.""" + + parsed_dependencies = await _exec_parse_deps(fs, python_infer_subsystem, python_setup) + + resolve = fs.resolve.normalized_value(python_setup) + + resolved_dependencies = await Get( + ResolvedParsedPythonDependencies, + ResolvedParsedPythonDependenciesRequest(fs, parsed_dependencies, resolve), + ) + + import_deps, unowned_imports = _collect_imports_info(resolved_dependencies.resolve_results) + + imports_to_other_owners = await _find_other_owners_for_unowned_imports( + UnownedImportsPossibleOwnersRequest(unowned_imports, resolve), + ) + + return PythonSourceAnalysis( + fs, parsed_dependencies, resolved_dependencies, imports_to_other_owners + ) + + +@dataclass(frozen=True) +class ImportAnalysis: + """Information on the inferred imports for a Python file.""" + + name: str + reference: Union[ParsedPythonImportInfo, str] + resolved: ImportResolveResult + possible_resolve: Optional[list[tuple[Address, ResolveName]]] + + +@dataclass(frozen=True) +class CollectedImportAnalysis: + """Collected information on all Python files.""" + + imports: list[ImportAnalysis] + assets: list[ImportAnalysis] + + +def collect_analysis(raw: PythonSourceAnalysis) -> CollectedImportAnalysis: + """Collect raw analysis and present it in a helpful per-import format.""" + imports = [] + + resolved_results = raw.resolved.resolve_results + + for name, info in raw.identified.imports.items(): + possible_resolve = raw.possible_owners.value.get(name) + + imports.append( + ImportAnalysis( + name=name, + reference=info, + resolved=resolved_results[name], + possible_resolve=possible_resolve, + ) + ) + + assets = [] + resolved_assets = raw.resolved.assets + + for name in raw.identified.assets: + possible_resolve = raw.possible_owners.value.get(name) + + assets.append( + ImportAnalysis( + name=name, + reference=name, # currently assets don't keep track of their line numbers + resolved=resolved_assets[name], + possible_resolve=possible_resolve, + ) + ) + + return CollectedImportAnalysis(imports, assets) + + +@goal_rule +async def dump_python_source_analysis( + request: DumpPythonSourceAnalysisSubsystem, + targets: Targets, + console: Console, +) -> DumpPythonSourceAnalysis: + source_field_sets = [ + PythonImportDependenciesInferenceFieldSet.create(tgt) + for tgt in targets + if PythonSourceFieldSet.is_applicable(tgt) + ] + + source_analysis = await MultiGet( + Get( + PythonSourceAnalysis, + PythonImportDependenciesInferenceFieldSet, + fs, + ) + for fs in source_field_sets + ) + + output: Any + if request.flavor == AnalysisFlavor.raw_dependency_inference: + output = source_analysis + else: + output = {str(a.fs.address): collect_analysis(a) for a in source_analysis} + + console.print_stdout(json.dumps(output, cls=_PeekJsonEncoder)) + return DumpPythonSourceAnalysis(exit_code=0) + + +def rules(): + return [ + *import_rules(), + *collect_rules(), + ] diff --git a/src/python/pants/backend/python/goals/debug_goals_test.py b/src/python/pants/backend/python/goals/debug_goals_test.py new file mode 100644 index 00000000000..c149377dca1 --- /dev/null +++ b/src/python/pants/backend/python/goals/debug_goals_test.py @@ -0,0 +1,141 @@ +# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +from pathlib import Path +from textwrap import dedent + +import pytest + +from pants.backend.python import target_types_rules +from pants.backend.python.dependency_inference.rules import ( + ImportOwnerStatus, + PythonImportDependenciesInferenceFieldSet, + import_rules, +) +from pants.backend.python.goals import debug_goals +from pants.backend.python.goals.debug_goals import PythonSourceAnalysis +from pants.backend.python.macros import python_requirements +from pants.backend.python.macros.python_requirements import PythonRequirementsTargetGenerator +from pants.backend.python.target_types import ( + PythonRequirementTarget, + PythonSourcesGeneratorTarget, + PythonSourceTarget, +) +from pants.build_graph.address import Address +from pants.core.target_types import FileTarget +from pants.core.target_types import rules as core_target_types_rules +from pants.engine.internals.parametrize import Parametrize +from pants.testutil.rule_runner import QueryRule, RuleRunner + + +@pytest.fixture +def imports_rule_runner() -> RuleRunner: + resolves = {"python-default": "", "other": ""} + + rule_runner = RuleRunner( + rules=[ + *import_rules(), + *target_types_rules.rules(), + *core_target_types_rules(), + *python_requirements.rules(), + *debug_goals.rules(), + QueryRule(PythonSourceAnalysis, [PythonImportDependenciesInferenceFieldSet]), + ], + target_types=[ + PythonSourceTarget, + PythonSourcesGeneratorTarget, + PythonRequirementTarget, + PythonRequirementsTargetGenerator, + FileTarget, + ], + objects={"parametrize": Parametrize}, + ) + rule_runner.set_options( + [ + "--python-infer-assets", + "--python-enable-resolves", + f"--python-resolves={resolves}", + ], + env_inherit={"PATH", "PYENV_ROOT", "HOME"}, + ) + return rule_runner + + +def test_debug_goals(imports_rule_runner: RuleRunner): + filedir = "project" + filename = "t.py" + + imports_rule_runner.write_files( + { + str(Path(filedir, filename)): dedent( + f"""\ + import json # unownable, root level + import os.path # unownable, not root level + + import stuff # dependency missing + import watchdog # dependency included in other resolve + import yaml # dependency included + + try: + import weakimport # weakimport missing + except ImportError: + ... + + open("missing.json") + # missing asset + open("{filedir}/config.json") + # asset + """ + ), + str(Path(filedir, "BUILD")): dedent( + f"""\ + python_source( + name="t", + source="t.py", + dependencies=["//{filedir}:config"], + resolve="python-default", + ) + + file( + name="config", + source="config.json", + ) + + python_requirement( + name="imported", + requirements=["pyyaml"], + ) + + python_requirement( + name="other", + requirements=["watchdog"], + resolve="other", + ) + """ + ), + str(Path(filedir, "config.json")): "", + } + ) + + tgt = imports_rule_runner.get_target(Address(filedir, target_name="t")) + + result = imports_rule_runner.request( + PythonSourceAnalysis, (PythonImportDependenciesInferenceFieldSet.create(tgt),) + ) + + assert result + assert len(result.identified.imports) == 6 + assert ( + len([i for i in result.identified.imports.values() if i.weak]) == 1 + ), "did not find the weak import" + assert len(result.identified.assets) == 1 + assert ( + result.resolved.assets[str(Path(filedir, "config.json"))].status + == ImportOwnerStatus.unambiguous + ) + + # possible owners + assert result.resolved.resolve_results["watchdog"].status == ImportOwnerStatus.unowned + assert result.possible_owners.value["watchdog"]