Skip to content

Commit

Permalink
Do not scan uncommentable files [fix]
Browse files Browse the repository at this point in the history
This prevents incidents of e.g. falsely detected copyright info
in e.g. binary files, if they contain the word "Copyright" as plain-text.
This happened to me in the case of some JPG files, for example.

This also saves us from reading binary file contents unnecessarily.
  • Loading branch information
hoijui authored and carmenbianca committed Oct 24, 2023
1 parent 62b42b6 commit ead44ea
Showing 1 changed file with 40 additions and 31 deletions.
71 changes: 40 additions & 31 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
_contains_snippet,
_copyright_from_dep5,
_determine_license_path,
_is_commentable,
decoded_text_from_binary,
extract_reuse_info,
)
Expand Down Expand Up @@ -184,40 +185,48 @@ def reuse_info_of(self, path: StrPath) -> List[ReuseInfo]:
_("'{path}' covered by .reuse/dep5").format(path=path)
)

# Search the file for REUSE information.
with path.open("rb") as fp:
try:
# Completely read the file once to search for possible snippets
if _contains_snippet(fp):
_LOGGER.debug(f"'{path}' seems to contain a SPDX Snippet")
read_limit = None
else:
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possible limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result.contains_copyright_or_licensing():
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE
if path != Path(original_path) or _is_commentable(path):
# Search the file for REUSE information.
with path.open("rb") as fp:
try:
# Completely read the file once
# to search for possible snippets
if _contains_snippet(fp):
_LOGGER.debug(
f"'{path}' seems to contain an SPDX Snippet"
)
read_limit = None
else:
source_type = SourceType.FILE_HEADER
file_result = file_result.copy(
path=self.relative_from_root(original_path).as_posix(),
source_path=self.relative_from_root(path).as_posix(),
source_type=source_type,
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possibly limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result.contains_copyright_or_licensing():
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE
else:
source_type = SourceType.FILE_HEADER
file_result = file_result.copy(
path=self.relative_from_root(
original_path
).as_posix(),
source_path=self.relative_from_root(
path
).as_posix(),
source_type=source_type,
)

except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)
except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)

# There is both information in a .dep5 file and in the file header
if dep5_result.contains_info() and file_result.contains_info():
Expand Down

0 comments on commit ead44ea

Please sign in to comment.