Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge validate goal with lint goal #14102

Merged
merged 5 commits into from
Jan 29, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions .github/workflows/test-cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,9 @@ jobs:

'
- name: Lint
run: './pants validate ''**''
run: './pants update-build-files --check

./pants update-build-files --check

./pants lint check ::
./pants lint check ''**''

'
- if: always()
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -515,11 +515,9 @@ jobs:

'
- name: Lint
run: './pants validate ''**''
run: './pants update-build-files --check

./pants update-build-files --check

./pants lint check ::
./pants lint check ''**''

'
- if: always()
Expand Down
4 changes: 2 additions & 2 deletions build-support/bin/generate_github_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,9 @@ def test_workflow_jobs(python_versions: list[str], *, cron: bool) -> Jobs:
{
"name": "Lint",
"run": (
"./pants validate '**'\n"
"./pants update-build-files --check\n"
"./pants lint check ::\n"
# Note: we use `**` rather than `::` because regex-lint.
"./pants lint check '**'\n"
),
},
upload_log_artifacts(name="lint"),
Expand Down
161 changes: 124 additions & 37 deletions src/python/pants/backend/project_info/regex_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@
from enum import Enum
from typing import Any, cast

from pants.base.deprecated import resolve_conflicting_options
from pants.base.deprecated import resolve_conflicting_options, warn_or_error
from pants.base.exiter import PANTS_FAILED_EXIT_CODE, PANTS_SUCCEEDED_EXIT_CODE
from pants.core.goals.lint import LintFilesRequest, LintResult, LintResults
from pants.engine.collection import Collection
from pants.engine.console import Console
from pants.engine.fs import Digest, DigestContents, SpecsSnapshot
from pants.engine.fs import Digest, DigestContents, PathGlobs, SpecsSnapshot
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.rules import Get, collect_rules, goal_rule
from pants.engine.rules import Get, collect_rules, goal_rule, rule
from pants.engine.unions import UnionRule
from pants.option.subsystem import Subsystem
from pants.util.frozendict import FrozenDict
from pants.util.logging import LogLevel
from pants.util.memo import memoized_method

logger = logging.getLogger(__name__)
Expand All @@ -43,7 +46,7 @@ class DetailLevel(Enum):

class ValidateSubsystem(GoalSubsystem):
name = "validate"
help = "Validate sources against regexes."
help = "Deprecated: to use, set `[regex-lint].config` and run with the `lint` goal"

@classmethod
def register_options(cls, register):
Expand Down Expand Up @@ -94,7 +97,14 @@ def from_dict(cls, d: dict[str, Any]) -> ValidationConfig:

class RegexLintSubsystem(Subsystem):
options_scope = "regex-lint"
help = "Lint your code using regex patterns, e.g. to check for copyright headers."
help = (
"Lint your code using regex patterns, e.g. to check for copyright headers.\n\n"
"To activate this with the `lint` goal, you must set `[regex-lint].config`.\n\n"
"Unlike other linters, this can run on files not owned by targets, such as BUILD files. To "
"run on those, use `lint '**'` rather than `lint ::`, for example. Unfortunately, "
"`--changed-since=<sha>` does not yet cause this linter to run. We are exploring how to "
"improve both these gotchas."
)

deprecated_options_scope = "sourcefile-validation"
deprecated_options_scope_removal_version = "2.11.0.dev0"
Expand All @@ -103,39 +113,47 @@ class RegexLintSubsystem(Subsystem):
def register_options(cls, register):
schema_help = textwrap.dedent(
"""\
Config schema is as follows:

```
{
'required_matches': {
'path_pattern1': [content_pattern1, content_pattern2],
'path_pattern2': [content_pattern1, content_pattern3],
...
},
'path_patterns': [
{
'name': path_pattern1',
'pattern': <path regex pattern>,
'inverted': True|False (defaults to False),
'content_encoding': <encoding> (defaults to utf8)
},
...
],
'content_patterns': [
{
'required_matches': {
'path_pattern1': [content_pattern1, content_pattern2],
'path_pattern2': [content_pattern1, content_pattern3],
...
},
'path_patterns': [
{
'name': path_pattern1',
'pattern': <path regex pattern>,
'inverted': True|False (defaults to False),
'content_encoding': <encoding> (defaults to utf8)
},
...
],
'content_patterns': [
{
'name': 'content_pattern1',
'pattern': <content regex pattern>,
'inverted': True|False (defaults to False)
}
...
]
'name': 'content_pattern1',
'pattern': <content regex pattern>,
'inverted': True|False (defaults to False)
}

Meaning: if a file matches some path pattern, its content must match all
the corresponding content patterns.
...
]
}
```
"""
)
super().register_options(register)
register("--config", type=dict, fromfile=True, help=schema_help)
register(
"--config",
type=dict,
fromfile=True,
help=(
f"Config schema is as follows:\n\n{schema_help}\n\n"
"Meaning: if a file matches some path pattern, its content must match all the "
"corresponding content patterns.\n\n"
"It's often helpful to load this config from a JSON or YAML file. To do that, set "
"`[regex-lint].config = '@path/to/config.yaml'`, for example."
),
)
register(
"--detail-level",
type=DetailLevel,
Expand Down Expand Up @@ -303,15 +321,27 @@ def get_applicable_content_pattern_names(self, path):
return applicable_content_pattern_names, content_encoding


# TODO: Consider switching this to `lint`. The main downside is that we would no longer be able to
# run on files with no owning targets, such as running on BUILD files.
@goal_rule
async def validate(
async def validate_goal(
console: Console,
specs_snapshot: SpecsSnapshot,
validate_subsystem: ValidateSubsystem,
regex_lint_subsystem: RegexLintSubsystem,
) -> Validate:
warn_or_error(
"2.11.0.dev0",
"the `validate` goal",
(
"The `regex-lint` check run by the `validate` goal is now run as part of the "
"`lint` goal. So long as you have set up `[regex-lint].config` "
"(or the deprecated `[sourcefile-validation].config`), this checker will run.\n\n"
"Note that if you were running `validate '**'` in order to check files without "
"owning targets (e.g. to check BUILD file contents), then you will need to run "
"`lint '**'` rather than `lint ::`. Also, `--changed-since=<sha>` will not cause "
"`regex-lint` to run, same as it how `validate` worked. We are exploring how to remove "
"both these gotchas."
Comment on lines +335 to +342
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth referring to a ticket in the help as part of the explanation of the gotchas? Improving the --changed behavior is possibly related to #13757 (i.e., the need to refactor Specs calculation to move it into a single call to the engine).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the most relevant thing is my design doc from November. Should I reference that? Fwit, that remains one of the most pressing projects I personally want to work on.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine as is. We've always had these gotchas since adding validate in 2020.

),
)
multi_matcher = regex_lint_subsystem.get_multi_matcher()
if multi_matcher is None:
logger.error(
Expand Down Expand Up @@ -365,5 +395,62 @@ async def validate(
return Validate(exit_code)


class RegexLintRequest(LintFilesRequest):
name = "regex-lint"


@rule(desc="Lint with regex patterns", level=LogLevel.DEBUG)
async def lint_with_regex_patterns(
request: RegexLintRequest,
validate_subsystem: ValidateSubsystem,
regex_lint_subsystem: RegexLintSubsystem,
) -> LintResults:
multi_matcher = regex_lint_subsystem.get_multi_matcher()
if multi_matcher is None:
return LintResults((), linter_name=request.name)

digest_contents = await Get(DigestContents, PathGlobs(request.file_paths))
regex_match_results = RegexMatchResults(
multi_matcher.check_source_file(file_content.path, file_content.content)
for file_content in sorted(digest_contents, key=lambda fc: fc.path)
)

stdout = ""
detail_level = regex_lint_subsystem.detail_level(validate_subsystem)
num_matched_all = 0
num_nonmatched_some = 0
for rmr in regex_match_results:
if not rmr.matching and not rmr.nonmatching:
continue
if detail_level == DetailLevel.names:
if rmr.nonmatching:
stdout += f"{rmr.path}\n"
continue

if rmr.nonmatching:
icon = "X"
num_nonmatched_some += 1
else:
icon = "V"
num_matched_all += 1
matched_msg = " Matched: {}".format(",".join(rmr.matching)) if rmr.matching else ""
nonmatched_msg = (
" Didn't match: {}".format(",".join(rmr.nonmatching)) if rmr.nonmatching else ""
)
if detail_level == DetailLevel.all or (
detail_level == DetailLevel.nonmatching and nonmatched_msg
):
stdout += f"{icon} {rmr.path}:{matched_msg}{nonmatched_msg}\n"

if detail_level not in (DetailLevel.none, DetailLevel.names):
if stdout:
stdout += "\n"
stdout += f"{num_matched_all} files matched all required patterns.\n"
stdout += f"{num_nonmatched_some} files failed to match at least one required pattern."

exit_code = PANTS_FAILED_EXIT_CODE if num_nonmatched_some else PANTS_SUCCEEDED_EXIT_CODE
return LintResults((LintResult(exit_code, stdout, ""),), linter_name=request.name)


def rules():
return collect_rules()
return (*collect_rules(), UnionRule(LintFilesRequest, RegexLintRequest))
79 changes: 56 additions & 23 deletions src/python/pants/core/goals/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import itertools
import logging
from dataclasses import dataclass
from typing import Any, Iterable, cast
from typing import Any, ClassVar, Iterable, cast

from pants.core.goals.style_request import StyleRequest, style_batch_size_help, write_reports
from pants.core.util_rules.distdir import DistDir
from pants.engine.console import Console
from pants.engine.engine_aware import EngineAwareReturnType
from pants.engine.fs import EMPTY_DIGEST, Digest, Workspace
from pants.engine.engine_aware import EngineAwareParameter, EngineAwareReturnType
from pants.engine.fs import EMPTY_DIGEST, Digest, SpecsSnapshot, Workspace
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.process import FallibleProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, goal_rule
Expand Down Expand Up @@ -134,6 +134,19 @@ class LintRequest(StyleRequest):
"""


@union
@dataclass(frozen=True)
class LintFilesRequest(EngineAwareParameter):
"""The entry point for linters that do not use targets."""

name: ClassVar[str]

file_paths: tuple[str, ...]

def debug_hint(self) -> str:
return self.name


# If a user wants linter reports to show up in dist/ they must ensure that the reports
# are written under this directory. E.g.,
# ./pants --flake8-args="--output-file=reports/report.txt" lint <target>
Expand Down Expand Up @@ -199,43 +212,63 @@ async def lint(
console: Console,
workspace: Workspace,
targets: Targets,
specs_snapshot: SpecsSnapshot,
lint_subsystem: LintSubsystem,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commenting in the wrong place, but: is it possible to update required_union_implementations for this case? Seems like it would need both AND and OR perhaps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has been a problem for generate-lockfiles, too. We want to OR whereas it's implemented as AND.

union_membership: UnionMembership,
dist_dir: DistDir,
) -> Lint:
request_types = cast("Iterable[type[LintRequest]]", union_membership[LintRequest])
requests = tuple(
target_request_types = cast("Iterable[type[LintRequest]]", union_membership[LintRequest])
target_requests = tuple(
request_type(
request_type.field_set_type.create(target)
for target in targets
if request_type.field_set_type.is_applicable(target)
)
for request_type in request_types
for request_type in target_request_types
)
file_requests = tuple(
request_type(specs_snapshot.snapshot.files)
for request_type in union_membership[LintFilesRequest]
)

if lint_subsystem.per_file_caching:
all_batch_results = await MultiGet(
Get(LintResults, LintRequest, request.__class__([field_set]))
for request in requests
if request.field_sets
for field_set in request.field_sets
)
all_requests = [
*(
Get(LintResults, LintRequest, request.__class__([field_set]))
for request in target_requests
if request.field_sets
for field_set in request.field_sets
),
*(
Get(LintResults, LintFilesRequest, request.__class__((fp,)))
for request in file_requests
for fp in request.file_paths
),
]
else:

def address_str(fs: FieldSet) -> str:
return fs.address.spec

all_batch_results = await MultiGet(
Get(LintResults, LintRequest, request.__class__(field_set_batch))
for request in requests
if request.field_sets
for field_set_batch in partition_sequentially(
request.field_sets,
key=address_str,
size_target=lint_subsystem.batch_size,
size_max=4 * lint_subsystem.batch_size,
)
)
all_requests = [
*(
Get(LintResults, LintRequest, request.__class__(field_set_batch))
for request in target_requests
if request.field_sets
for field_set_batch in partition_sequentially(
request.field_sets,
key=address_str,
size_target=lint_subsystem.batch_size,
size_max=4 * lint_subsystem.batch_size,
)
),
*(Get(LintResults, LintFilesRequest, request) for request in file_requests),
]

all_batch_results = cast(
"tuple[LintResults, ...]",
await MultiGet(all_requests), # type: ignore[arg-type]
)

def key_fn(results: LintResults):
return results.linter_name
Expand Down
Loading