Skip to content

Commit

Permalink
Merge pull request #825 from hoijui/main
Browse files Browse the repository at this point in the history
Do not scan uncommentable files
  • Loading branch information
carmenbianca authored Oct 24, 2023
2 parents ecd09f3 + fc7ec3d commit a8c56c4
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 62 deletions.
4 changes: 3 additions & 1 deletion AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ Contributors

- Shun Sakai

- Dirk Brömmel
- Dirk Brömmel

- Robin Vobruba

Translators
-----------
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ CLI command and its behaviour. There are no guarantees of stability for the

- Alpine Docker image now uses 3.18 as base. (#846)

- No longer scan binary or uncommentable files for their contents in search of
REUSE information. (#825)

### Deprecated

### Removed
Expand Down
43 changes: 41 additions & 2 deletions src/reuse/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,34 @@
from itertools import chain
from os import PathLike
from pathlib import Path, PurePath
from typing import IO, Any, BinaryIO, Dict, Iterator, List, Optional, Set, Union
from typing import (
IO,
Any,
BinaryIO,
Dict,
Iterator,
List,
Optional,
Set,
Type,
Union,
cast,
)

from binaryornot.check import is_binary
from boolean.boolean import Expression, ParseError
from debian.copyright import Copyright
from license_expression import ExpressionError, Licensing

from . import ReuseInfo, SourceType
from ._licenses import ALL_NON_DEPRECATED_MAP
from .comment import _all_style_classes
from .comment import (
EXTENSION_COMMENT_STYLE_MAP_LOWERCASE,
FILENAME_COMMENT_STYLE_MAP_LOWERCASE,
CommentStyle,
UncommentableCommentStyle,
_all_style_classes,
)

# TODO: When removing Python 3.8 support, use PathLike[str]
StrPath = Union[str, PathLike]
Expand Down Expand Up @@ -265,6 +284,26 @@ def _contains_snippet(binary_file: BinaryIO) -> bool:
return False


def _get_comment_style(path: StrPath) -> Optional[Type[CommentStyle]]:
"""Return value of CommentStyle detected for *path* or None."""
path = Path(path)
style = FILENAME_COMMENT_STYLE_MAP_LOWERCASE.get(path.name.lower())
if style is None:
style = cast(
Optional[Type[CommentStyle]],
EXTENSION_COMMENT_STYLE_MAP_LOWERCASE.get(path.suffix.lower()),
)
return style


def _is_commentable(path: Path) -> bool:
"""Determines if *path* is commentable, e.g., the file is a not a binary nor
registered as an UncommentableCommentStyle.
"""
is_uncommentable = _get_comment_style(path) == UncommentableCommentStyle
return not (is_uncommentable or is_binary(str(path)))


def merge_copyright_lines(copyright_lines: Set[str]) -> Set[str]:
"""Parse all copyright lines and merge identical statements making years
into a range.
Expand Down
32 changes: 5 additions & 27 deletions src/reuse/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
cast,
)

from binaryornot.check import is_binary
from boolean.boolean import ParseError
from jinja2 import Environment, FileSystemLoader, PackageLoader, Template
from jinja2.exceptions import TemplateNotFound
Expand All @@ -49,6 +48,8 @@
StrPath,
_determine_license_path,
_determine_license_suffix_path,
_get_comment_style,
_is_commentable,
contains_reuse_info,
detect_line_endings,
extract_reuse_info,
Expand All @@ -57,15 +58,12 @@
spdx_identifier,
)
from .comment import (
EXTENSION_COMMENT_STYLE_MAP_LOWERCASE,
FILENAME_COMMENT_STYLE_MAP_LOWERCASE,
NAME_STYLE_MAP,
CommentCreateError,
CommentParseError,
CommentStyle,
EmptyCommentStyle,
PythonCommentStyle,
UncommentableCommentStyle,
)
from .project import Project

Expand Down Expand Up @@ -377,26 +375,6 @@ def add_new_header(
return new_text


def _get_comment_style(path: StrPath) -> Optional[Type[CommentStyle]]:
"""Return value of CommentStyle detected for *path* or None."""
path = Path(path)
style = FILENAME_COMMENT_STYLE_MAP_LOWERCASE.get(path.name.lower())
if style is None:
style = cast(
Optional[Type[CommentStyle]],
EXTENSION_COMMENT_STYLE_MAP_LOWERCASE.get(path.suffix.lower()),
)
return style


def _is_uncommentable(path: Path) -> bool:
"""Determines if *path* is uncommentable, e.g., the file is a binary or
registered as an UncommentableCommentStyle.
"""
is_uncommentable = _get_comment_style(path) == UncommentableCommentStyle
return is_uncommentable or is_binary(str(path))


def _verify_paths_line_handling(
paths: Iterable[Path],
parser: ArgumentParser,
Expand Down Expand Up @@ -433,10 +411,10 @@ def _verify_paths_comment_style(

for path in paths:
style = _get_comment_style(path)
not_uncommentable = not _is_uncommentable(path)
commentable = _is_commentable(path)

# TODO: This check is duplicated.
if style is None and not_uncommentable:
if style is None and commentable:
unrecognised_files.append(path)

if unrecognised_files:
Expand Down Expand Up @@ -813,7 +791,7 @@ def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int:

result = 0
for path in paths:
uncommentable = _is_uncommentable(path)
uncommentable = not _is_commentable(path)
if uncommentable or args.force_dot_license:
new_path = _determine_license_suffix_path(path)
if uncommentable:
Expand Down
79 changes: 47 additions & 32 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
_contains_snippet,
_copyright_from_dep5,
_determine_license_path,
_is_commentable,
decoded_text_from_binary,
extract_reuse_info,
)
Expand Down Expand Up @@ -184,40 +185,54 @@ def reuse_info_of(self, path: StrPath) -> List[ReuseInfo]:
_("'{path}' covered by .reuse/dep5").format(path=path)
)

# Search the file for REUSE information.
with path.open("rb") as fp:
try:
# Completely read the file once to search for possible snippets
if _contains_snippet(fp):
_LOGGER.debug(f"'{path}' seems to contain a SPDX Snippet")
read_limit = None
else:
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possible limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result.contains_copyright_or_licensing():
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE
else:
source_type = SourceType.FILE_HEADER
file_result = file_result.copy(
path=self.relative_from_root(original_path).as_posix(),
source_path=self.relative_from_root(path).as_posix(),
source_type=source_type,
if not _is_commentable(path):
_LOGGER.info(
_(
"'{path}' was detected as a binary file or its extension is"
" marked as uncommentable; not searching its contents for"
" REUSE information."
).format(path=path)
)
else:
# Search the file for REUSE information.
with path.open("rb") as fp:
try:
read_limit: Optional[int] = _HEADER_BYTES
# Completely read the file once
# to search for possible snippets
if _contains_snippet(fp):
_LOGGER.debug(
f"'{path}' seems to contain an SPDX Snippet"
)
read_limit = None
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possibly limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result.contains_copyright_or_licensing():
source_type = SourceType.FILE_HEADER
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE
file_result = file_result.copy(
path=self.relative_from_root(
original_path
).as_posix(),
source_path=self.relative_from_root(
path
).as_posix(),
source_type=source_type,
)

except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)
except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)

# There is both information in a .dep5 file and in the file header
if dep5_result.contains_info() and file_result.contains_info():
Expand Down

0 comments on commit a8c56c4

Please sign in to comment.