From 2127fb174ea762f8c000017aa8864a805c86850d Mon Sep 17 00:00:00 2001 From: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com> Date: Sat, 29 Jan 2022 14:59:20 -0700 Subject: [PATCH] Merge `validate` goal with `lint` goal (#14102) This adds generic support for `lint` implementations that do not deal with targets. That allows us to merge `validate` into `lint`, which is much cleaner. ## CLI specs As before with the `validate` goal, it's not very intuitive how to get Pants to run on files not owned by targets, which you want for `validate`. `::` only matches files owned by targets, whereas `**` matches _all_ files regardless of targets. So, users of `regex-lint` should typically use `./pants lint '**'` rather than `./pants lint ::`, which is not intuitive https://docs.google.com/document/d/1WWQM-X6kHoSCKwItqf61NiKFWNSlpnTC5QNu3ul9RDk/edit#heading=h.1h4j0d5mazhu proposes changing `::` to match all files, so you can simply use `./pants lint ::`. I don't think we need to block on this proposal? This is still forward progress, and also `validate`/`regex-lint` is not used very much fwict. ## Batching We don't yet batch per https://github.com/pantsbuild/pants/pull/14186, although it would be trivial for us to hook up. I'm only waiting to do it till we can better reason about if it makes sense to apply here too. ## The `fmt` goal Note that we need more design for `fmt` before we can apply this same change there. fmt is tricky because we run each formatter for a certain language sequentially so that they don't overwrite each other; but we run distinct languages in parallel. We would need some way to know which "language" target-less files are for. ## "Inferred targets" A related technology would be inferred targets, where you don't need a BUILD flie but we still have a target: https://github.com/pantsbuild/pants/issues/14074. This is a complementary technology. The main difference here is that we can operate on files that will _never_ have an owning target, such as a BUILD file itself. [ci skip-rust] [ci skip-build-wheels] --- .github/workflows/test-cron.yaml | 6 +- .github/workflows/test.yaml | 6 +- .../bin/generate_github_workflows.py | 4 +- .../pants/backend/project_info/regex_lint.py | 161 ++++++++++++++---- src/python/pants/core/goals/lint.py | 79 ++++++--- src/python/pants/core/goals/lint_test.py | 52 ++++-- .../pants/help/help_integration_test.py | 2 +- 7 files changed, 229 insertions(+), 81 deletions(-) diff --git a/.github/workflows/test-cron.yaml b/.github/workflows/test-cron.yaml index a1a72497e67..26e327f881f 100644 --- a/.github/workflows/test-cron.yaml +++ b/.github/workflows/test-cron.yaml @@ -340,11 +340,9 @@ jobs: ' - name: Lint - run: './pants validate ''**'' + run: './pants update-build-files --check - ./pants update-build-files --check - - ./pants lint check :: + ./pants lint check ''**'' ' - if: always() diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e6ccfb5a8c7..63b24c1fe3e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -515,11 +515,9 @@ jobs: ' - name: Lint - run: './pants validate ''**'' + run: './pants update-build-files --check - ./pants update-build-files --check - - ./pants lint check :: + ./pants lint check ''**'' ' - if: always() diff --git a/build-support/bin/generate_github_workflows.py b/build-support/bin/generate_github_workflows.py index 64322da78e8..d0863fe29af 100644 --- a/build-support/bin/generate_github_workflows.py +++ b/build-support/bin/generate_github_workflows.py @@ -380,9 +380,9 @@ def test_workflow_jobs(python_versions: list[str], *, cron: bool) -> Jobs: { "name": "Lint", "run": ( - "./pants validate '**'\n" "./pants update-build-files --check\n" - "./pants lint check ::\n" + # Note: we use `**` rather than `::` because regex-lint. + "./pants lint check '**'\n" ), }, upload_log_artifacts(name="lint"), diff --git a/src/python/pants/backend/project_info/regex_lint.py b/src/python/pants/backend/project_info/regex_lint.py index db5d4447331..495e72b11e9 100644 --- a/src/python/pants/backend/project_info/regex_lint.py +++ b/src/python/pants/backend/project_info/regex_lint.py @@ -10,15 +10,18 @@ from enum import Enum from typing import Any, cast -from pants.base.deprecated import resolve_conflicting_options +from pants.base.deprecated import resolve_conflicting_options, warn_or_error from pants.base.exiter import PANTS_FAILED_EXIT_CODE, PANTS_SUCCEEDED_EXIT_CODE +from pants.core.goals.lint import LintFilesRequest, LintResult, LintResults from pants.engine.collection import Collection from pants.engine.console import Console -from pants.engine.fs import Digest, DigestContents, SpecsSnapshot +from pants.engine.fs import Digest, DigestContents, PathGlobs, SpecsSnapshot from pants.engine.goal import Goal, GoalSubsystem -from pants.engine.rules import Get, collect_rules, goal_rule +from pants.engine.rules import Get, collect_rules, goal_rule, rule +from pants.engine.unions import UnionRule from pants.option.subsystem import Subsystem from pants.util.frozendict import FrozenDict +from pants.util.logging import LogLevel from pants.util.memo import memoized_method logger = logging.getLogger(__name__) @@ -43,7 +46,7 @@ class DetailLevel(Enum): class ValidateSubsystem(GoalSubsystem): name = "validate" - help = "Validate sources against regexes." + help = "Deprecated: to use, set `[regex-lint].config` and run with the `lint` goal" @classmethod def register_options(cls, register): @@ -94,7 +97,14 @@ def from_dict(cls, d: dict[str, Any]) -> ValidationConfig: class RegexLintSubsystem(Subsystem): options_scope = "regex-lint" - help = "Lint your code using regex patterns, e.g. to check for copyright headers." + help = ( + "Lint your code using regex patterns, e.g. to check for copyright headers.\n\n" + "To activate this with the `lint` goal, you must set `[regex-lint].config`.\n\n" + "Unlike other linters, this can run on files not owned by targets, such as BUILD files. To " + "run on those, use `lint '**'` rather than `lint ::`, for example. Unfortunately, " + "`--changed-since=` does not yet cause this linter to run. We are exploring how to " + "improve both these gotchas." + ) deprecated_options_scope = "sourcefile-validation" deprecated_options_scope_removal_version = "2.11.0.dev0" @@ -103,39 +113,47 @@ class RegexLintSubsystem(Subsystem): def register_options(cls, register): schema_help = textwrap.dedent( """\ - Config schema is as follows: - + ``` + { + 'required_matches': { + 'path_pattern1': [content_pattern1, content_pattern2], + 'path_pattern2': [content_pattern1, content_pattern3], + ... + }, + 'path_patterns': [ + { + 'name': path_pattern1', + 'pattern': , + 'inverted': True|False (defaults to False), + 'content_encoding': (defaults to utf8) + }, + ... + ], + 'content_patterns': [ { - 'required_matches': { - 'path_pattern1': [content_pattern1, content_pattern2], - 'path_pattern2': [content_pattern1, content_pattern3], - ... - }, - 'path_patterns': [ - { - 'name': path_pattern1', - 'pattern': , - 'inverted': True|False (defaults to False), - 'content_encoding': (defaults to utf8) - }, - ... - ], - 'content_patterns': [ - { - 'name': 'content_pattern1', - 'pattern': , - 'inverted': True|False (defaults to False) - } - ... - ] + 'name': 'content_pattern1', + 'pattern': , + 'inverted': True|False (defaults to False) } - - Meaning: if a file matches some path pattern, its content must match all - the corresponding content patterns. + ... + ] + } + ``` """ ) super().register_options(register) - register("--config", type=dict, fromfile=True, help=schema_help) + register( + "--config", + type=dict, + fromfile=True, + help=( + f"Config schema is as follows:\n\n{schema_help}\n\n" + "Meaning: if a file matches some path pattern, its content must match all the " + "corresponding content patterns.\n\n" + "It's often helpful to load this config from a JSON or YAML file. To do that, set " + "`[regex-lint].config = '@path/to/config.yaml'`, for example." + ), + ) register( "--detail-level", type=DetailLevel, @@ -303,15 +321,27 @@ def get_applicable_content_pattern_names(self, path): return applicable_content_pattern_names, content_encoding -# TODO: Consider switching this to `lint`. The main downside is that we would no longer be able to -# run on files with no owning targets, such as running on BUILD files. @goal_rule -async def validate( +async def validate_goal( console: Console, specs_snapshot: SpecsSnapshot, validate_subsystem: ValidateSubsystem, regex_lint_subsystem: RegexLintSubsystem, ) -> Validate: + warn_or_error( + "2.11.0.dev0", + "the `validate` goal", + ( + "The `regex-lint` check run by the `validate` goal is now run as part of the " + "`lint` goal. So long as you have set up `[regex-lint].config` " + "(or the deprecated `[sourcefile-validation].config`), this checker will run.\n\n" + "Note that if you were running `validate '**'` in order to check files without " + "owning targets (e.g. to check BUILD file contents), then you will need to run " + "`lint '**'` rather than `lint ::`. Also, `--changed-since=` will not cause " + "`regex-lint` to run, same as it how `validate` worked. We are exploring how to remove " + "both these gotchas." + ), + ) multi_matcher = regex_lint_subsystem.get_multi_matcher() if multi_matcher is None: logger.error( @@ -365,5 +395,62 @@ async def validate( return Validate(exit_code) +class RegexLintRequest(LintFilesRequest): + name = "regex-lint" + + +@rule(desc="Lint with regex patterns", level=LogLevel.DEBUG) +async def lint_with_regex_patterns( + request: RegexLintRequest, + validate_subsystem: ValidateSubsystem, + regex_lint_subsystem: RegexLintSubsystem, +) -> LintResults: + multi_matcher = regex_lint_subsystem.get_multi_matcher() + if multi_matcher is None: + return LintResults((), linter_name=request.name) + + digest_contents = await Get(DigestContents, PathGlobs(request.file_paths)) + regex_match_results = RegexMatchResults( + multi_matcher.check_source_file(file_content.path, file_content.content) + for file_content in sorted(digest_contents, key=lambda fc: fc.path) + ) + + stdout = "" + detail_level = regex_lint_subsystem.detail_level(validate_subsystem) + num_matched_all = 0 + num_nonmatched_some = 0 + for rmr in regex_match_results: + if not rmr.matching and not rmr.nonmatching: + continue + if detail_level == DetailLevel.names: + if rmr.nonmatching: + stdout += f"{rmr.path}\n" + continue + + if rmr.nonmatching: + icon = "X" + num_nonmatched_some += 1 + else: + icon = "V" + num_matched_all += 1 + matched_msg = " Matched: {}".format(",".join(rmr.matching)) if rmr.matching else "" + nonmatched_msg = ( + " Didn't match: {}".format(",".join(rmr.nonmatching)) if rmr.nonmatching else "" + ) + if detail_level == DetailLevel.all or ( + detail_level == DetailLevel.nonmatching and nonmatched_msg + ): + stdout += f"{icon} {rmr.path}:{matched_msg}{nonmatched_msg}\n" + + if detail_level not in (DetailLevel.none, DetailLevel.names): + if stdout: + stdout += "\n" + stdout += f"{num_matched_all} files matched all required patterns.\n" + stdout += f"{num_nonmatched_some} files failed to match at least one required pattern." + + exit_code = PANTS_FAILED_EXIT_CODE if num_nonmatched_some else PANTS_SUCCEEDED_EXIT_CODE + return LintResults((LintResult(exit_code, stdout, ""),), linter_name=request.name) + + def rules(): - return collect_rules() + return (*collect_rules(), UnionRule(LintFilesRequest, RegexLintRequest)) diff --git a/src/python/pants/core/goals/lint.py b/src/python/pants/core/goals/lint.py index 0c7255af0ed..f8ea86c07df 100644 --- a/src/python/pants/core/goals/lint.py +++ b/src/python/pants/core/goals/lint.py @@ -6,13 +6,13 @@ import itertools import logging from dataclasses import dataclass -from typing import Any, Iterable, cast +from typing import Any, ClassVar, Iterable, cast from pants.core.goals.style_request import StyleRequest, style_batch_size_help, write_reports from pants.core.util_rules.distdir import DistDir from pants.engine.console import Console -from pants.engine.engine_aware import EngineAwareReturnType -from pants.engine.fs import EMPTY_DIGEST, Digest, Workspace +from pants.engine.engine_aware import EngineAwareParameter, EngineAwareReturnType +from pants.engine.fs import EMPTY_DIGEST, Digest, SpecsSnapshot, Workspace from pants.engine.goal import Goal, GoalSubsystem from pants.engine.process import FallibleProcessResult from pants.engine.rules import Get, MultiGet, collect_rules, goal_rule @@ -134,6 +134,19 @@ class LintRequest(StyleRequest): """ +@union +@dataclass(frozen=True) +class LintFilesRequest(EngineAwareParameter): + """The entry point for linters that do not use targets.""" + + name: ClassVar[str] + + file_paths: tuple[str, ...] + + def debug_hint(self) -> str: + return self.name + + # If a user wants linter reports to show up in dist/ they must ensure that the reports # are written under this directory. E.g., # ./pants --flake8-args="--output-file=reports/report.txt" lint @@ -199,43 +212,63 @@ async def lint( console: Console, workspace: Workspace, targets: Targets, + specs_snapshot: SpecsSnapshot, lint_subsystem: LintSubsystem, union_membership: UnionMembership, dist_dir: DistDir, ) -> Lint: - request_types = cast("Iterable[type[LintRequest]]", union_membership[LintRequest]) - requests = tuple( + target_request_types = cast("Iterable[type[LintRequest]]", union_membership[LintRequest]) + target_requests = tuple( request_type( request_type.field_set_type.create(target) for target in targets if request_type.field_set_type.is_applicable(target) ) - for request_type in request_types + for request_type in target_request_types + ) + file_requests = tuple( + request_type(specs_snapshot.snapshot.files) + for request_type in union_membership[LintFilesRequest] ) if lint_subsystem.per_file_caching: - all_batch_results = await MultiGet( - Get(LintResults, LintRequest, request.__class__([field_set])) - for request in requests - if request.field_sets - for field_set in request.field_sets - ) + all_requests = [ + *( + Get(LintResults, LintRequest, request.__class__([field_set])) + for request in target_requests + if request.field_sets + for field_set in request.field_sets + ), + *( + Get(LintResults, LintFilesRequest, request.__class__((fp,))) + for request in file_requests + for fp in request.file_paths + ), + ] else: def address_str(fs: FieldSet) -> str: return fs.address.spec - all_batch_results = await MultiGet( - Get(LintResults, LintRequest, request.__class__(field_set_batch)) - for request in requests - if request.field_sets - for field_set_batch in partition_sequentially( - request.field_sets, - key=address_str, - size_target=lint_subsystem.batch_size, - size_max=4 * lint_subsystem.batch_size, - ) - ) + all_requests = [ + *( + Get(LintResults, LintRequest, request.__class__(field_set_batch)) + for request in target_requests + if request.field_sets + for field_set_batch in partition_sequentially( + request.field_sets, + key=address_str, + size_target=lint_subsystem.batch_size, + size_max=4 * lint_subsystem.batch_size, + ) + ), + *(Get(LintResults, LintFilesRequest, request) for request in file_requests), + ] + + all_batch_results = cast( + "tuple[LintResults, ...]", + await MultiGet(all_requests), # type: ignore[arg-type] + ) def key_fn(results: LintResults): return results.linter_name diff --git a/src/python/pants/core/goals/lint_test.py b/src/python/pants/core/goals/lint_test.py index f21dd6caae3..f180f444352 100644 --- a/src/python/pants/core/goals/lint_test.py +++ b/src/python/pants/core/goals/lint_test.py @@ -8,10 +8,18 @@ import pytest -from pants.core.goals.lint import Lint, LintRequest, LintResult, LintResults, LintSubsystem, lint +from pants.core.goals.lint import ( + Lint, + LintFilesRequest, + LintRequest, + LintResult, + LintResults, + LintSubsystem, + lint, +) from pants.core.util_rules.distdir import DistDir from pants.engine.addresses import Address -from pants.engine.fs import Workspace +from pants.engine.fs import SpecsSnapshot, Workspace from pants.engine.target import FieldSet, MultipleSourcesField, Target, Targets from pants.engine.unions import UnionMembership from pants.testutil.option_util import create_goal_subsystem @@ -97,6 +105,14 @@ def exit_code(_: Iterable[Address]) -> int: return -1 +class MockFilesRequest(LintFilesRequest): + name = "FilesLinter" + + @property + def lint_results(self) -> LintResults: + return LintResults([LintResult(0, "", "")], linter_name=self.name) + + @pytest.fixture def rule_runner() -> RuleRunner: return RuleRunner() @@ -111,22 +127,31 @@ def run_lint_rule( *, lint_request_types: List[Type[LintRequest]], targets: List[Target], + run_files_linter: bool = False, per_file_caching: bool = False, batch_size: int = 128, ) -> Tuple[int, str]: + union_membership = UnionMembership( + { + LintRequest: lint_request_types, + LintFilesRequest: [MockFilesRequest] if run_files_linter else [], + } + ) + lint_subsystem = create_goal_subsystem( + LintSubsystem, + per_file_caching=per_file_caching, + batch_size=batch_size, + ) + specs_snapshot = SpecsSnapshot(rule_runner.make_snapshot_of_empty_files(["f.txt"])) with mock_console(rule_runner.options_bootstrapper) as (console, stdio_reader): - union_membership = UnionMembership({LintRequest: lint_request_types}) result: Lint = run_rule_with_mocks( lint, rule_args=[ console, Workspace(rule_runner.scheduler, _enforce_effects=False), Targets(targets), - create_goal_subsystem( - LintSubsystem, - per_file_caching=per_file_caching, - batch_size=batch_size, - ), + specs_snapshot, + lint_subsystem, union_membership, DistDir(relpath=Path("dist")), ], @@ -134,8 +159,13 @@ def run_lint_rule( MockGet( output_type=LintResults, input_type=LintRequest, - mock=lambda field_set_collection: field_set_collection.lint_results, - ) + mock=lambda mock_request: mock_request.lint_results, + ), + MockGet( + output_type=LintResults, + input_type=LintFilesRequest, + mock=lambda mock_request: mock_request.lint_results, + ), ], union_membership=union_membership, ) @@ -176,6 +206,7 @@ def test_summary(rule_runner: RuleRunner, per_file_caching: bool) -> None: ], targets=[make_target(good_address), make_target(bad_address)], per_file_caching=per_file_caching, + run_files_linter=True, ) assert exit_code == FailingRequest.exit_code([bad_address]) assert stderr == dedent( @@ -183,6 +214,7 @@ def test_summary(rule_runner: RuleRunner, per_file_caching: bool) -> None: 𐄂 ConditionallySucceedsLinter failed. 𐄂 FailingLinter failed. + ✓ FilesLinter succeeded. ✓ SuccessfulLinter succeeded. """ ) diff --git a/src/python/pants/help/help_integration_test.py b/src/python/pants/help/help_integration_test.py index b5b686ace34..9f55f2ce1c4 100644 --- a/src/python/pants/help/help_integration_test.py +++ b/src/python/pants/help/help_integration_test.py @@ -92,7 +92,7 @@ def test_help_goals() -> None: def test_help_goals_only_show_implemented() -> None: # Some core goals, such as `./pants test`, require downstream implementations to work # properly. We should only show those goals when an implementation is provided. - goals_that_need_implementation = ["binary", "fmt", "lint", "run", "test"] + goals_that_need_implementation = ["fmt", "test"] command = ["--pants-config-files=[]", "help", "goals"] not_implemented_run = run_pants(["--backend-packages=[]", *command])