From cb94206dd86e5904fe3a52564dbc7500f7012264 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 11 Jan 2023 18:06:15 +0100 Subject: [PATCH 01/60] Rework `lint` Adding functions to retrieve and format the data from the `ProjectReport` class. --- src/reuse/lint.py | 187 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 185 insertions(+), 2 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 8ff9657b1..e2ed566d2 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -8,6 +8,7 @@ """ import contextlib +import json import os import sys from gettext import gettext as _ @@ -26,6 +27,13 @@ def _write_element(element, out=sys.stdout): def lint(report: ProjectReport, out=sys.stdout) -> bool: """Lint the entire project.""" + + # Collect data from report + # save into data structure (if report is not suitable) + + # Write output formatting functions (dynamic output formats) + # Write output writing functions (stdout[, file, webrequest, ...]) + bad_licenses_result = lint_bad_licenses(report, out) deprecated_result = lint_deprecated_licenses(report, out) extensionless = lint_licenses_without_extension(report, out) @@ -330,7 +338,178 @@ def add_arguments(parser): ) -def run(args, project: Project, out=sys.stdout): +def collect_data_from_report(report: ProjectReport) -> dict: + """Collects and formats data from report and returns it as a dictionary + + :param report: ProjectReport object + :return: Formatted dictionary containing data from the ProjectReport object + """ + # Setup report data container + data = { + "json_version": "1.0", + "reuse_version": __REUSE_version__, + "non_compliant": {}, + "files": {}, + "summary": { + "used_licenses": [], + }, + } + + # Populate 'non_compliant' + data["non_compliant"] = { + "missing_licenses": [str(f) for f in report.missing_licenses], + "unused_licenses": [str(f) for f in report.unused_licenses], + "deprecated_licenses": [str(f) for f in report.deprecated_licenses], + "bad_licenses": [str(f) for f in report.bad_licenses], + "licenses_without_extension": [ + str(f) for f in report.licenses_without_extension + ], + "missing_copyright_info": [ + str(f) for f in report.files_without_copyright + ], + "missing_licensing_info": [str(f) for f in report.missing_licenses], + "read_error": [str(f) for f in report.read_errors], + } + + # Populate 'files' + for file in report.file_reports: + copyrights = file.spdxfile.copyright.split("\n") + data["files"][str(file.path)] = { + "copyrights": [ + {"value": copyright, "source": file.spdxfile.name} + for copyright in copyrights + ], + "licenses": [ + {"value": license, "source": file.spdxfile.name} + for license in file.spdxfile.licenses_in_file + ], + } + + # Populate 'summary' + number_of_files = len(report.file_reports) + is_compliant = not any( + any(result) + for result in ( + data["non_compliant"]["missing_licenses"], + data["non_compliant"]["unused_licenses"], + data["non_compliant"]["bad_licenses"], + data["non_compliant"]["deprecated_licenses"], + data["non_compliant"]["licenses_without_extension"], + data["non_compliant"]["missing_copyright_info"], + data["non_compliant"]["missing_licensing_info"], + data["non_compliant"]["read_error"], + ) + ) + data["summary"] = { + "used_licenses": list(report.used_licenses), + "files_total": number_of_files, + "files_with_copyright_info": number_of_files + - len(report.files_without_copyright), + "files_with_licensing_info": number_of_files + - len(report.files_without_licenses), + "compliant": is_compliant, + } + return data + + +def format_json(data) -> str: + """Formats data dictionary as JSON string ready to be printed to std.out + + :param data: Dictionary containing formatted ProjectReport data + :return: String (representing JSON) that can be output to std.out + """ + return json.dumps(data, indent=2) + + +def format_plain(data) -> str: + """Formats data dictionary as plaintext string to be printed to std.out + + :param data: Dictionary containing formatted ProjectReport data + :return: String (in plaintext) that can be output to std.out + """ + output = "" + if not data["summary"]["compliant"]: + + output += "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + output += "\n\n" + files_without_copyright = set( + data["non_compliant"]["missing_copyright_info"] + ) + files_without_license = set( + data["non_compliant"]["missing_licensing_info"] + ) + files_without_both = files_without_license.intersection( + files_without_license + ) + + if files_without_both: + output += _( + "The following files have no copyright and licensing " + "information:" + ) + output += "\n" + for file in sorted(files_without_both): + output += f"* {file}\n" + + if files_without_copyright - files_without_both: + output += _("The following files have no copyright information:") + output += "\n" + for file in sorted(files_without_copyright - files_without_both): + output += f"* {file}\n" + + if files_without_license - files_without_both: + output += _("The following files have no licensing information:") + output += "\n" + for file in sorted(files_without_license - files_without_both): + output += f"* {file}\n" + + # bad licenses + # deprecated licenses + + output += "\n" + + output += "# " + _("SUMMARY") + output += "\n\n" + summary_contents = [ + (_("Bad licenses:"), ", ".join(data["non_compliant"]["bad_licenses"])), + ( + _("Deprecated licenses:"), + ", ".join(data["non_compliant"]["deprecated_licenses"]), + ), + ( + _("Licenses without file extension:"), + ", ".join(data["non_compliant"]["licenses_without_extension"]), + ), + ( + _("Missing licenses:"), + ", ".join(data["non_compliant"]["missing_licensing_info"]), + ), + ( + _("Unused licenses:"), + ", ".join(data["non_compliant"]["unused_licenses"]), + ), + (_("Used licenses:"), ", ".join(data["summary"]["used_licenses"])), + ] + + for key, value in summary_contents: + if not value: + value = "0" + output += "* " + key + ": " + value + "\n" + + return output + + +def output_data(data: dict, formatter, out=sys.stdout): + """Outputs data to stdout + + :param data: + :param formatter: + :param out: + """ + out.write(formatter(data)) + + +def run(args, project: Project): """List all non-compliant files.""" report = ProjectReport.generate( project, do_checksum=False, multiprocessing=not args.no_multiprocessing @@ -338,7 +517,11 @@ def run(args, project: Project, out=sys.stdout): with contextlib.ExitStack() as stack: if args.quiet: + # TODO Rework quiet flag out = stack.enter_context(open(os.devnull, "w", encoding="utf-8")) - result = lint(report, out=out) + # TODO Toggle JSON formatter via flag + data = collect_data_from_report(report) + output_data(data, format_json()) + result = data["summary"]["compliant"] return 0 if result else 1 From 3ae8af7e63b84051d5d195860604e7461a199d8c Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:08:00 +0100 Subject: [PATCH 02/60] Add --json` argument --- src/reuse/lint.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index e2ed566d2..91661542b 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -336,6 +336,9 @@ def add_arguments(parser): parser.add_argument( "-q", "--quiet", action="store_true", help=_("prevents output") ) + parser.add_argument( + "-j", "--json", action="store_true", help=_("formats output as JSON") + ) def collect_data_from_report(report: ProjectReport) -> dict: @@ -509,7 +512,8 @@ def output_data(data: dict, formatter, out=sys.stdout): out.write(formatter(data)) -def run(args, project: Project): + +def run(args, project: Project, out=sys.stdout, formatter=format_plain): """List all non-compliant files.""" report = ProjectReport.generate( project, do_checksum=False, multiprocessing=not args.no_multiprocessing @@ -517,11 +521,13 @@ def run(args, project: Project): with contextlib.ExitStack() as stack: if args.quiet: - # TODO Rework quiet flag out = stack.enter_context(open(os.devnull, "w", encoding="utf-8")) - # TODO Toggle JSON formatter via flag + + if args.json: + formatter = format_json + data = collect_data_from_report(report) - output_data(data, format_json()) + lint(data, formatter=formatter, out=out) result = data["summary"]["compliant"] return 0 if result else 1 From 2e6805e7a17058b171882b3ab04a577f39fb876f Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:08:29 +0100 Subject: [PATCH 03/60] Slim down data dictionary creation --- src/reuse/lint.py | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 91661542b..499b7b20b 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -351,40 +351,34 @@ def collect_data_from_report(report: ProjectReport) -> dict: data = { "json_version": "1.0", "reuse_version": __REUSE_version__, - "non_compliant": {}, + "non_compliant": { + "missing_licenses": report.missing_licenses, + "unused_licenses": [str(f) for f in report.unused_licenses], + "deprecated_licenses": [str(f) for f in report.deprecated_licenses], + "bad_licenses": report.bad_licenses, + "licenses_without_extension": [ + str(f) for f in report.licenses_without_extension.values() + ], + "missing_copyright_info": [str(f) for f in report.files_without_copyright], + "missing_licensing_info": [str(f) for f in report.files_without_licenses], + "read_error": [str(f) for f in report.read_errors], + }, "files": {}, "summary": { "used_licenses": [], }, } - # Populate 'non_compliant' - data["non_compliant"] = { - "missing_licenses": [str(f) for f in report.missing_licenses], - "unused_licenses": [str(f) for f in report.unused_licenses], - "deprecated_licenses": [str(f) for f in report.deprecated_licenses], - "bad_licenses": [str(f) for f in report.bad_licenses], - "licenses_without_extension": [ - str(f) for f in report.licenses_without_extension - ], - "missing_copyright_info": [ - str(f) for f in report.files_without_copyright - ], - "missing_licensing_info": [str(f) for f in report.missing_licenses], - "read_error": [str(f) for f in report.read_errors], - } - # Populate 'files' for file in report.file_reports: copyrights = file.spdxfile.copyright.split("\n") data["files"][str(file.path)] = { "copyrights": [ - {"value": copyright, "source": file.spdxfile.name} - for copyright in copyrights + {"value": cop, "source": file.spdxfile.name} for cop in copyrights ], "licenses": [ - {"value": license, "source": file.spdxfile.name} - for license in file.spdxfile.licenses_in_file + {"value": lic, "source": file.spdxfile.name} + for lic in file.spdxfile.licenses_in_file ], } From 5a03e9eedc2db0cae184f105d09f73be20c6a980 Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:09:15 +0100 Subject: [PATCH 04/60] Rework plaintext formatter --- src/reuse/lint.py | 135 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 113 insertions(+), 22 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 499b7b20b..cbbde78dc 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -419,54 +419,107 @@ def format_json(data) -> str: def format_plain(data) -> str: - """Formats data dictionary as plaintext string to be printed to std.out + """Formats data dictionary as plaintext string to be printed to sys.stdout :param data: Dictionary containing formatted ProjectReport data - :return: String (in plaintext) that can be output to std.out + :return: String (in plaintext) that can be output to sys.stdout """ output = "" + + # If the project is not compliant: if not data["summary"]["compliant"]: - output += "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") - output += "\n\n" - files_without_copyright = set( - data["non_compliant"]["missing_copyright_info"] - ) - files_without_license = set( - data["non_compliant"]["missing_licensing_info"] - ) - files_without_both = files_without_license.intersection( - files_without_license - ) + # Missing copyright and licensing information + files_without_copyright = set(data["non_compliant"]["missing_copyright_info"]) + files_without_license = set(data["non_compliant"]["missing_licensing_info"]) + files_without_both = files_without_license.intersection(files_without_copyright) + header = "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n" if files_without_both: + output += header output += _( - "The following files have no copyright and licensing " - "information:" + "The following files have no copyright and licensing " "information:" ) output += "\n" for file in sorted(files_without_both): output += f"* {file}\n" + output += "\n\n" if files_without_copyright - files_without_both: + output += header output += _("The following files have no copyright information:") output += "\n" for file in sorted(files_without_copyright - files_without_both): output += f"* {file}\n" + output += "\n\n" if files_without_license - files_without_both: + output += header output += _("The following files have no licensing information:") output += "\n" for file in sorted(files_without_license - files_without_both): output += f"* {file}\n" + output += "\n\n" + + # Bad licenses + bad_licenses = data["non_compliant"]["bad_licenses"] + if bad_licenses: + output += "# " + _("BAD LICENSES") + "\n\n" + for lic, files in sorted(bad_licenses.items()): + output += f"'{lic}' found in:" + "\n" + for f in sorted(files): + output += f"* {f}\n" + output += "\n\n" + + # Deprecated licenses + deprecated_licenses = data["non_compliant"]["deprecated_licenses"] + if deprecated_licenses: + output += "# " + _("DEPRECATED LICENSES") + "\n\n" + output += _("The following licenses are deprecated by SPDX:") + "\n" + for lic in sorted(deprecated_licenses): + output += f"* {lic}\n" + output += "\n\n" + + # Licenses without extension + licenses_without_extension = data["non_compliant"]["licenses_without_extension"] + if licenses_without_extension: + output += "# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n" + output += _("The following licenses have no file extension:") + "\n" + for path in sorted(licenses_without_extension): + output += f"* {str(path)}" + "\n" + output += "\n\n" + + # Missing licenses + missing_licenses = data["non_compliant"]["missing_licenses"] + if missing_licenses: + output += "# " + _("MISSING LICENSES") + "\n\n" + for lic, files in sorted(missing_licenses.items()): + output += f"'{lic}' found in:" + "\n" + for f in sorted(files): + output += f"* {f}\n" + output += "\n" - # bad licenses - # deprecated licenses - - output += "\n" + # Unused licenses + unused_licenses = data["non_compliant"]["unused_licenses"] + if unused_licenses: + output += "# " + _("UNUSED LICENSES") + "\n\n" + output += _("The following licenses are not used:") + "\n" + for lic in sorted(deprecated_licenses): + output += f"* {lic}\n" + output += "\n\n" + + # Read errors + read_errors = data["non_compliant"]["read_error"] + if read_errors: + output += "# " + _("READ ERRORS") + "\n\n" + output += _("Could not read:") + "\n" + for path in sorted(read_errors): + output += f"* {str(path)}" + "\n" + output += "\n\n" output += "# " + _("SUMMARY") output += "\n\n" + summary_contents = [ (_("Bad licenses:"), ", ".join(data["non_compliant"]["bad_licenses"])), ( @@ -475,23 +528,61 @@ def format_plain(data) -> str: ), ( _("Licenses without file extension:"), - ", ".join(data["non_compliant"]["licenses_without_extension"]), + ", ".join( + [ + lic.split("/")[1] + for lic in data["non_compliant"]["licenses_without_extension"] + ] + ), ), ( _("Missing licenses:"), - ", ".join(data["non_compliant"]["missing_licensing_info"]), + ", ".join(data["non_compliant"]["missing_licenses"]), ), ( _("Unused licenses:"), ", ".join(data["non_compliant"]["unused_licenses"]), ), (_("Used licenses:"), ", ".join(data["summary"]["used_licenses"])), + ( + _("Read errors: {count}").format( + count=len(data["non_compliant"]["read_error"]) + ), + "empty", + ), + ( + _("files with copyright information: {count} / {total}").format( + count=data["summary"]["files_with_copyright_info"], + total=data["summary"]["files_total"], + ), + "empty", + ), + ( + _("files with license information: {count} / {total}").format( + count=data["summary"]["files_with_licensing_info"], + total=data["summary"]["files_total"], + ), + "empty", + ), ] for key, value in summary_contents: if not value: value = "0" - output += "* " + key + ": " + value + "\n" + if value == "empty": + value = "" + output += "* " + key + " " + value + "\n" + + if data["summary"]["compliant"]: + output += _( + "Congratulations! Your project is compliant with version" + " {} of the REUSE Specification :-)" + ).format(__REUSE_version__) + else: + output += _( + "Unfortunately, your project is not compliant with version " + "{} of the REUSE Specification :-(" + ).format(__REUSE_version__) return output From 21d00266c54c62dc953b90bc8b6a48b39c0f09c5 Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:09:47 +0100 Subject: [PATCH 05/60] Move JSON formatter and re-add `lint` function --- src/reuse/lint.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index cbbde78dc..15880861c 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -409,15 +409,6 @@ def collect_data_from_report(report: ProjectReport) -> dict: return data -def format_json(data) -> str: - """Formats data dictionary as JSON string ready to be printed to std.out - - :param data: Dictionary containing formatted ProjectReport data - :return: String (representing JSON) that can be output to std.out - """ - return json.dumps(data, indent=2) - - def format_plain(data) -> str: """Formats data dictionary as plaintext string to be printed to sys.stdout @@ -587,15 +578,32 @@ def format_plain(data) -> str: return output -def output_data(data: dict, formatter, out=sys.stdout): - """Outputs data to stdout +def format_json(data) -> str: + """Formats data dictionary as JSON string ready to be printed to sys.stdout - :param data: - :param formatter: - :param out: + :param data: Dictionary containing formatted ProjectReport data + :return: String (representing JSON) that can be output to sys.stdout """ + + def set_default(obj): + if isinstance(obj, set): + return list(obj) + + return json.dumps(data, indent=2, default=set_default) + + +def lint(data: dict, formatter=format_plain, out=sys.stdout): + """Lints the entire project + + :param data: Dictionary holding formatted ProjectReport data + :param formatter: Callable that formats the data dictionary + :param out: Where to output + """ + out.write(formatter(data)) + result = data["summary"]["compliant"] + return result def run(args, project: Project, out=sys.stdout, formatter=format_plain): From 0b473f0861b383ea8ad3533cd4584046f6cdea81 Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:10:11 +0100 Subject: [PATCH 06/60] Remove old output generation functions --- src/reuse/lint.py | 312 ---------------------------------------------- 1 file changed, 312 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 15880861c..24327d1c5 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -19,318 +19,6 @@ from .report import ProjectReport -def _write_element(element, out=sys.stdout): - out.write("* ") - out.write(str(element)) - out.write("\n") - - -def lint(report: ProjectReport, out=sys.stdout) -> bool: - """Lint the entire project.""" - - # Collect data from report - # save into data structure (if report is not suitable) - - # Write output formatting functions (dynamic output formats) - # Write output writing functions (stdout[, file, webrequest, ...]) - - bad_licenses_result = lint_bad_licenses(report, out) - deprecated_result = lint_deprecated_licenses(report, out) - extensionless = lint_licenses_without_extension(report, out) - missing_licenses_result = lint_missing_licenses(report, out) - unused_licenses_result = lint_unused_licenses(report, out) - read_errors_result = lint_read_errors(report, out) - files_without_cali = lint_files_without_copyright_and_licensing(report, out) - - lint_summary(report, out=out) - - success = not any( - any(result) - for result in ( - bad_licenses_result, - deprecated_result, - extensionless, - missing_licenses_result, - unused_licenses_result, - read_errors_result, - files_without_cali, - ) - ) - - out.write("\n") - if success: - out.write( - _( - "Congratulations! Your project is compliant with version" - " {} of the REUSE Specification :-)" - ).format(__REUSE_version__) - ) - else: - out.write( - _( - "Unfortunately, your project is not compliant with version " - "{} of the REUSE Specification :-(" - ).format(__REUSE_version__) - ) - out.write("\n") - - return success - - -def lint_bad_licenses(report: ProjectReport, out=sys.stdout) -> Iterable[str]: - """Lint for bad licenses. Bad licenses are licenses that are not in the - SPDX License List or do not start with LicenseRef-. - """ - bad_files = [] - - if report.bad_licenses: - out.write("# ") - out.write(_("BAD LICENSES")) - out.write("\n") - for lic, files in sorted(report.bad_licenses.items()): - out.write("\n") - out.write(_("'{}' found in:").format(lic)) - out.write("\n") - for file_ in sorted(files): - bad_files.append(file_) - _write_element(file_, out=out) - out.write("\n\n") - - return bad_files - - -def lint_deprecated_licenses( - report: ProjectReport, out=sys.stdout -) -> Iterable[str]: - """Lint for deprecated licenses.""" - deprecated = [] - - if report.deprecated_licenses: - out.write("# ") - out.write(_("DEPRECATED LICENSES")) - out.write("\n\n") - out.write(_("The following licenses are deprecated by SPDX:")) - out.write("\n") - for lic in sorted(report.deprecated_licenses): - deprecated.append(lic) - _write_element(lic, out=out) - out.write("\n\n") - - return deprecated - - -def lint_licenses_without_extension( - report: ProjectReport, out=sys.stdout -) -> Iterable[str]: - """Lint for licenses without extensions.""" - extensionless = [] - - if report.licenses_without_extension: - out.write("# ") - out.write(_("LICENSES WITHOUT FILE EXTENSION")) - out.write("\n\n") - out.write(_("The following licenses have no file extension:")) - out.write("\n") - for __, path in sorted(report.licenses_without_extension.items()): - extensionless.append(path) - _write_element(path, out=out) - out.write("\n\n") - - return extensionless - - -def lint_missing_licenses( - report: ProjectReport, out=sys.stdout -) -> Iterable[str]: - """Lint for missing licenses. A license is missing when it is referenced - in a file, but cannot be found. - """ - bad_files = [] - - if report.missing_licenses: - out.write("# ") - out.write(_("MISSING LICENSES")) - out.write("\n") - - for lic, files in sorted(report.missing_licenses.items()): - out.write("\n") - out.write(_("'{}' found in:").format(lic)) - out.write("\n") - for file_ in sorted(files): - bad_files.append(file_) - _write_element(file_, out=out) - out.write("\n\n") - - return bad_files - - -def lint_unused_licenses( - report: ProjectReport, out=sys.stdout -) -> Iterable[str]: - """Lint for unused licenses.""" - unused_licenses = [] - - if report.unused_licenses: - out.write("# ") - out.write(_("UNUSED LICENSES")) - out.write("\n\n") - out.write(_("The following licenses are not used:")) - out.write("\n") - for lic in sorted(report.unused_licenses): - unused_licenses.append(lic) - _write_element(lic, out=out) - out.write("\n\n") - - return unused_licenses - - -def lint_read_errors(report: ProjectReport, out=sys.stdout) -> Iterable[str]: - """Lint for read errors.""" - bad_files = [] - - if report.read_errors: - out.write("# ") - out.write(_("READ ERRORS")) - out.write("\n\n") - out.write(_("Could not read:")) - out.write("\n") - for file_ in report.read_errors: - bad_files.append(file_) - _write_element(file_, out=out) - out.write("\n\n") - - return bad_files - - -def lint_files_without_copyright_and_licensing( - report: ProjectReport, out=sys.stdout -) -> Iterable[str]: - """Lint for files that do not have copyright or licensing information.""" - # TODO: The below three operations can probably be optimised. - both = set(report.files_without_copyright) & set( - report.files_without_licenses - ) - only_copyright = set(report.files_without_copyright) - both - only_licensing = set(report.files_without_licenses) - both - - if any((both, only_copyright, only_licensing)): - out.write("# ") - out.write(_("MISSING COPYRIGHT AND LICENSING INFORMATION")) - out.write("\n\n") - if both: - out.write( - _( - "The following files have no copyright and licensing " - "information:" - ) - ) - out.write("\n") - for file_ in sorted(both): - _write_element(file_, out=out) - out.write("\n") - if only_copyright: - out.write(_("The following files have no copyright information:")) - out.write("\n") - for file_ in sorted(only_copyright): - _write_element(file_, out=out) - out.write("\n") - if only_licensing: - out.write(_("The following files have no licensing information:")) - out.write("\n") - for file_ in sorted(only_licensing): - _write_element(file_, out=out) - out.write("\n") - out.write("\n") - - return both | only_copyright | only_licensing - - -def lint_summary(report: ProjectReport, out=sys.stdout) -> None: - """Print a summary for linting.""" - # pylint: disable=too-many-statements - out.write("# ") - out.write(_("SUMMARY")) - out.write("\n\n") - - file_total = len(report.file_reports) - - out.write("* ") - out.write(_("Bad licenses:")) - for i, lic in enumerate(sorted(report.bad_licenses)): - if i: - out.write(",") - out.write(" ") - out.write(lic) - out.write("\n") - - out.write("* ") - out.write(_("Deprecated licenses:")) - for i, lic in enumerate(sorted(report.deprecated_licenses)): - if i: - out.write(",") - out.write(" ") - out.write(lic) - out.write("\n") - - out.write("* ") - out.write(_("Licenses without file extension:")) - for i, lic in enumerate(sorted(report.licenses_without_extension)): - if i: - out.write(",") - out.write(" ") - out.write(lic) - out.write("\n") - - out.write("* ") - out.write(_("Missing licenses:")) - for i, lic in enumerate(sorted(report.missing_licenses)): - if i: - out.write(",") - out.write(" ") - out.write(lic) - out.write("\n") - - out.write("* ") - out.write(_("Unused licenses:")) - for i, lic in enumerate(sorted(report.unused_licenses)): - if i: - out.write(",") - out.write(" ") - out.write(lic) - out.write("\n") - - out.write("* ") - out.write(_("Used licenses:")) - for i, lic in enumerate(sorted(report.used_licenses)): - if i: - out.write(",") - out.write(" ") - out.write(lic) - out.write("\n") - - out.write("* ") - out.write(_("Read errors: {count}").format(count=len(report.read_errors))) - out.write("\n") - - out.write("* ") - out.write( - _("Files with copyright information: {count} / {total}").format( - count=file_total - len(report.files_without_copyright), - total=file_total, - ) - ) - out.write("\n") - - out.write("* ") - out.write( - _("Files with license information: {count} / {total}").format( - count=file_total - len(report.files_without_licenses), - total=file_total, - ) - ) - out.write("\n") - - def add_arguments(parser): """Add arguments to parser.""" parser.add_argument( From 74fd00ee9c9d21012ef45c3e0c4a50398eede4ee Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:16:44 +0100 Subject: [PATCH 07/60] Adapt tests --- tests/test_lint.py | 63 +++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 99ec3932a..322c09ec5 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -5,20 +5,12 @@ """All tests for reuse.lint""" - import shutil import sys import pytest -from reuse.lint import ( - lint, - lint_bad_licenses, - lint_files_without_copyright_and_licensing, - lint_missing_licenses, - lint_read_errors, - lint_summary, -) +from reuse.lint import lint, collect_data_from_report from reuse.project import Project from reuse.report import ProjectReport @@ -40,7 +32,8 @@ def test_lint_simple(fake_repository): """Extremely simple test for lint.""" project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report) + data = collect_data_from_report(report) + result = lint(data) assert result @@ -48,7 +41,8 @@ def test_lint_git(git_repository): """Extremely simple test for lint with a git repository.""" project = Project(git_repository) report = ProjectReport.generate(project) - result = lint(report) + data = collect_data_from_report(report) + result = lint(data) assert result @@ -57,7 +51,8 @@ def test_lint_submodule(submodule_repository): project = Project(submodule_repository) (submodule_repository / "submodule/foo.c").write_text("foo") report = ProjectReport.generate(project) - result = lint(report) + data = collect_data_from_report(report) + result = lint(data) assert result @@ -66,7 +61,8 @@ def test_lint_submodule_included(submodule_repository): project = Project(submodule_repository, include_submodules=True) (submodule_repository / "submodule/foo.c").write_text("foo") report = ProjectReport.generate(project) - result = lint(report) + data = collect_data_from_report(report) + result = lint(data) assert not result @@ -74,7 +70,8 @@ def test_lint_empty_directory(empty_directory): """An empty directory is compliant.""" project = Project(empty_directory) report = ProjectReport.generate(project) - result = lint(report) + data = collect_data_from_report(report) + result = lint(data) assert result @@ -90,7 +87,8 @@ def test_lint_deprecated(fake_repository, stringio): project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report, out=stringio) + data = collect_data_from_report(report) + result = lint(data, out=stringio) assert not result assert "GPL-3.0" in stringio.getvalue() @@ -98,14 +96,13 @@ def test_lint_deprecated(fake_repository, stringio): def test_lint_bad_license(fake_repository, stringio): """A bad license is detected.""" - (fake_repository / "foo.py").write_text( - "SPDX-License-Identifier: bad-license" - ) + (fake_repository / "foo.py").write_text("SPDX-License-Identifier: bad-license") project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint_bad_licenses(report, out=stringio) + data = collect_data_from_report(report) + result = lint(data, out=stringio) - assert "foo.py" in str(list(result)[0]) + assert not result assert "foo.py" in stringio.getvalue() assert "bad-license" in stringio.getvalue() @@ -115,9 +112,10 @@ def test_lint_missing_licenses(fake_repository, stringio): (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT") project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint_missing_licenses(report, out=stringio) + data = collect_data_from_report(report) + result = lint(data, out=stringio) - assert "foo.py" in str(list(result)[0]) + assert not result assert "foo.py" in stringio.getvalue() assert "MIT" in stringio.getvalue() @@ -127,9 +125,11 @@ def test_lint_unused_licenses(fake_repository, stringio): (fake_repository / "LICENSES/MIT.txt").write_text("foo") project = Project(fake_repository) report = ProjectReport.generate(project) - lint_summary(report, out=stringio) + data = collect_data_from_report(report) + result = lint(data, out=stringio) - assert "MIT" in stringio.getvalue() + assert not result + assert "Unused licenses: MIT" in stringio.getvalue() @cpython @@ -140,9 +140,11 @@ def test_lint_read_errors(fake_repository, stringio): (fake_repository / "foo.py").chmod(0o000) project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint_read_errors(report, out=stringio) + data = collect_data_from_report(report) + result = lint(data, out=stringio) - assert "foo.py" in str(list(result)[0]) + assert not result + assert "Could not read:" in stringio.getvalue() assert "foo.py" in stringio.getvalue() @@ -151,9 +153,14 @@ def test_lint_files_without_copyright_and_licensing(fake_repository, stringio): (fake_repository / "foo.py").write_text("foo") project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint_files_without_copyright_and_licensing(report, out=stringio) + data = collect_data_from_report(report) + result = lint(data, out=stringio) - assert "foo.py" in str(list(result)[0]) + assert not result + assert ( + "The following files have no copyright and licensing information:" + in stringio.getvalue() + ) assert "foo.py" in stringio.getvalue() From fb32eadad3b818b2987c1ff5d5c4608f9f6ce352 Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:16:57 +0100 Subject: [PATCH 08/60] Add new tests --- tests/test_main.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index 519491594..4bfdb2308 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -11,6 +11,7 @@ # pylint: disable=redefined-outer-name,unused-argument import errno +import json import os import re from inspect import cleandoc @@ -25,6 +26,7 @@ from reuse._main import main from reuse._util import GIT_EXE, HG_EXE + # REUSE-IgnoreStart @@ -215,6 +217,29 @@ def test_lint_fail_quiet(fake_repository, stringio): assert stringio.getvalue() == "" +def test_lint_json(fake_repository, stringio): + """Run a failed lint.""" + result = main(["lint", "--json"], out=stringio) + output = json.loads(stringio.getvalue()) + + assert result == 0 + assert output["json_version"] == "1.0" + assert len(output["files"]) == 7 + + +def test_lint_json_fail(fake_repository, stringio): + """Run a failed lint.""" + (fake_repository / "foo.py").write_text("foo") + result = main(["lint", "--json"], out=stringio) + output = json.loads(stringio.getvalue()) + + assert result > 0 + assert output["json_version"] == "1.0" + assert len(output["non_compliant"]["missing_licensing_info"]) == 1 + assert len(output["non_compliant"]["missing_copyright_info"]) == 1 + assert len(output["files"]) == 8 + + def test_lint_no_file_extension(fake_repository, stringio): """If a license has no file extension, the lint fails.""" (fake_repository / "LICENSES/CC0-1.0.txt").rename( @@ -432,5 +457,4 @@ def test_supported_licenses(stringio): stringio.getvalue(), ) - # REUSE-IgnoreEnd From ff3d3c39aa546043d9b12ad6c535f39d8e8b8cbe Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:31:36 +0100 Subject: [PATCH 09/60] Satisfy pylint --- src/reuse/lint.py | 68 ++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 24327d1c5..6c4e1c0ac 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -12,7 +12,7 @@ import os import sys from gettext import gettext as _ -from typing import Iterable +from typing import Dict from . import __REUSE_version__ from .project import Project @@ -47,8 +47,12 @@ def collect_data_from_report(report: ProjectReport) -> dict: "licenses_without_extension": [ str(f) for f in report.licenses_without_extension.values() ], - "missing_copyright_info": [str(f) for f in report.files_without_copyright], - "missing_licensing_info": [str(f) for f in report.files_without_licenses], + "missing_copyright_info": [ + str(f) for f in report.files_without_copyright + ], + "missing_licensing_info": [ + str(f) for f in report.files_without_licenses + ], "read_error": [str(f) for f in report.read_errors], }, "files": {}, @@ -62,7 +66,8 @@ def collect_data_from_report(report: ProjectReport) -> dict: copyrights = file.spdxfile.copyright.split("\n") data["files"][str(file.path)] = { "copyrights": [ - {"value": cop, "source": file.spdxfile.name} for cop in copyrights + {"value": cop, "source": file.spdxfile.name} + for cop in copyrights ], "licenses": [ {"value": lic, "source": file.spdxfile.name} @@ -89,15 +94,16 @@ def collect_data_from_report(report: ProjectReport) -> dict: "used_licenses": list(report.used_licenses), "files_total": number_of_files, "files_with_copyright_info": number_of_files - - len(report.files_without_copyright), + - len(report.files_without_copyright), "files_with_licensing_info": number_of_files - - len(report.files_without_licenses), + - len(report.files_without_licenses), "compliant": is_compliant, } return data -def format_plain(data) -> str: +# pylint: disable=too-many-locals, too-many-branches, too-many-statements +def format_plain(data: Dict) -> str: """Formats data dictionary as plaintext string to be printed to sys.stdout :param data: Dictionary containing formatted ProjectReport data @@ -109,15 +115,24 @@ def format_plain(data) -> str: if not data["summary"]["compliant"]: # Missing copyright and licensing information - files_without_copyright = set(data["non_compliant"]["missing_copyright_info"]) - files_without_license = set(data["non_compliant"]["missing_licensing_info"]) - files_without_both = files_without_license.intersection(files_without_copyright) + files_without_copyright = set( + data["non_compliant"]["missing_copyright_info"] + ) + files_without_license = set( + data["non_compliant"]["missing_licensing_info"] + ) + files_without_both = files_without_license.intersection( + files_without_copyright + ) - header = "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n" + header = ( + "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n" + ) if files_without_both: output += header output += _( - "The following files have no copyright and licensing " "information:" + "The following files have no copyright and licensing " + "information:" ) output += "\n" for file in sorted(files_without_both): @@ -146,8 +161,8 @@ def format_plain(data) -> str: output += "# " + _("BAD LICENSES") + "\n\n" for lic, files in sorted(bad_licenses.items()): output += f"'{lic}' found in:" + "\n" - for f in sorted(files): - output += f"* {f}\n" + for file in sorted(files): + output += f"* {file}\n" output += "\n\n" # Deprecated licenses @@ -160,7 +175,9 @@ def format_plain(data) -> str: output += "\n\n" # Licenses without extension - licenses_without_extension = data["non_compliant"]["licenses_without_extension"] + licenses_without_extension = data["non_compliant"][ + "licenses_without_extension" + ] if licenses_without_extension: output += "# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n" output += _("The following licenses have no file extension:") + "\n" @@ -174,8 +191,8 @@ def format_plain(data) -> str: output += "# " + _("MISSING LICENSES") + "\n\n" for lic, files in sorted(missing_licenses.items()): output += f"'{lic}' found in:" + "\n" - for f in sorted(files): - output += f"* {f}\n" + for file in sorted(files): + output += f"* {file}\n" output += "\n" # Unused licenses @@ -210,7 +227,9 @@ def format_plain(data) -> str: ", ".join( [ lic.split("/")[1] - for lic in data["non_compliant"]["licenses_without_extension"] + for lic in data["non_compliant"][ + "licenses_without_extension" + ] ] ), ), @@ -266,21 +285,20 @@ def format_plain(data) -> str: return output -def format_json(data) -> str: +def format_json(data: Dict) -> str: """Formats data dictionary as JSON string ready to be printed to sys.stdout :param data: Dictionary containing formatted ProjectReport data :return: String (representing JSON) that can be output to sys.stdout """ - def set_default(obj): - if isinstance(obj, set): - return list(obj) - - return json.dumps(data, indent=2, default=set_default) + return json.dumps( + # Serialize sets to lists + data, indent=2, default=lambda x: list(x) if isinstance(x, set) else x + ) -def lint(data: dict, formatter=format_plain, out=sys.stdout): +def lint(data: Dict, formatter=format_plain, out=sys.stdout): """Lints the entire project :param data: Dictionary holding formatted ProjectReport data From cfcac8eb6db2f7446d1e9140062685840342de84 Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:39:24 +0100 Subject: [PATCH 10/60] Satisfy isort --- tests/test_lint.py | 6 ++++-- tests/test_main.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 322c09ec5..0cc41dceb 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -10,7 +10,7 @@ import pytest -from reuse.lint import lint, collect_data_from_report +from reuse.lint import collect_data_from_report, lint from reuse.project import Project from reuse.report import ProjectReport @@ -96,7 +96,9 @@ def test_lint_deprecated(fake_repository, stringio): def test_lint_bad_license(fake_repository, stringio): """A bad license is detected.""" - (fake_repository / "foo.py").write_text("SPDX-License-Identifier: bad-license") + (fake_repository / "foo.py").write_text( + "SPDX-License-Identifier: bad-license" + ) project = Project(fake_repository) report = ProjectReport.generate(project) data = collect_data_from_report(report) diff --git a/tests/test_main.py b/tests/test_main.py index 4bfdb2308..c2c6d1481 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -26,7 +26,6 @@ from reuse._main import main from reuse._util import GIT_EXE, HG_EXE - # REUSE-IgnoreStart @@ -457,4 +456,5 @@ def test_supported_licenses(stringio): stringio.getvalue(), ) + # REUSE-IgnoreEnd From d749d51d15e4c0bdbfc2aee55fea26c3636d400d Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:49:35 +0100 Subject: [PATCH 11/60] Make showing of license (not path) work on Windows --- src/reuse/lint.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 6c4e1c0ac..99b388262 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -45,7 +45,7 @@ def collect_data_from_report(report: ProjectReport) -> dict: "deprecated_licenses": [str(f) for f in report.deprecated_licenses], "bad_licenses": report.bad_licenses, "licenses_without_extension": [ - str(f) for f in report.licenses_without_extension.values() + f for f in report.licenses_without_extension.values() ], "missing_copyright_info": [ str(f) for f in report.files_without_copyright @@ -94,9 +94,9 @@ def collect_data_from_report(report: ProjectReport) -> dict: "used_licenses": list(report.used_licenses), "files_total": number_of_files, "files_with_copyright_info": number_of_files - - len(report.files_without_copyright), + - len(report.files_without_copyright), "files_with_licensing_info": number_of_files - - len(report.files_without_licenses), + - len(report.files_without_licenses), "compliant": is_compliant, } return data @@ -226,10 +226,9 @@ def format_plain(data: Dict) -> str: _("Licenses without file extension:"), ", ".join( [ - lic.split("/")[1] - for lic in data["non_compliant"][ - "licenses_without_extension" - ] + lic.parts[-1] for lic in data["non_compliant"][ + "licenses_without_extension" + ] ] ), ), @@ -294,7 +293,9 @@ def format_json(data: Dict) -> str: return json.dumps( # Serialize sets to lists - data, indent=2, default=lambda x: list(x) if isinstance(x, set) else x + data, + indent=2, + default=lambda x: list(x) if isinstance(x, set) else x, ) From 5dcd872cbe5137a0a2ee8ffac5c8b80f4ec0d56c Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:53:01 +0100 Subject: [PATCH 12/60] Satisfy pylint (again) --- src/reuse/lint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 99b388262..836458df5 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -45,7 +45,7 @@ def collect_data_from_report(report: ProjectReport) -> dict: "deprecated_licenses": [str(f) for f in report.deprecated_licenses], "bad_licenses": report.bad_licenses, "licenses_without_extension": [ - f for f in report.licenses_without_extension.values() + list(report.licenses_without_extension.values()) ], "missing_copyright_info": [ str(f) for f in report.files_without_copyright From e03fb3ed56c7a32050ad39a2561e61555c2e7ac2 Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:54:52 +0100 Subject: [PATCH 13/60] Add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 946fc020f..7384b1d42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ CLI command and its behaviour. There are no guarantees of stability for the SPDX license expression remains the same: `Apache-2.0 AND CC0-1.0 AND CC-BY-SA-4.0 AND GPL-3.0-or-later`. (#733) - Added `--contributor` option to `annotate`. (#669) +- Added `--json` flag to `lint` command (#654). ### Changed From 89dbd8b10d69d7b0c5ba939e02232ae3d1369ed4 Mon Sep 17 00:00:00 2001 From: linozen Date: Tue, 24 Jan 2023 17:59:17 +0100 Subject: [PATCH 14/60] Fix typo --- src/reuse/lint.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 836458df5..f755a00a5 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -44,9 +44,9 @@ def collect_data_from_report(report: ProjectReport) -> dict: "unused_licenses": [str(f) for f in report.unused_licenses], "deprecated_licenses": [str(f) for f in report.deprecated_licenses], "bad_licenses": report.bad_licenses, - "licenses_without_extension": [ - list(report.licenses_without_extension.values()) - ], + "licenses_without_extension": list( + report.licenses_without_extension.values() + ), "missing_copyright_info": [ str(f) for f in report.files_without_copyright ], @@ -226,7 +226,8 @@ def format_plain(data: Dict) -> str: _("Licenses without file extension:"), ", ".join( [ - lic.parts[-1] for lic in data["non_compliant"][ + file.parts[-1] + for file in data["non_compliant"][ "licenses_without_extension" ] ] From 034a9af5555517f49470121085ac6425faf3e9e0 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 15 Feb 2023 11:24:04 +0100 Subject: [PATCH 15/60] Move `collect_data_from_report` to `ProjectReport.to_dict` --- src/reuse/lint.py | 98 ++++++--------------------------------------- src/reuse/report.py | 92 ++++++++++++++++++++++++++++++++---------- 2 files changed, 83 insertions(+), 107 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index f755a00a5..624f75d5f 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -14,7 +14,6 @@ from gettext import gettext as _ from typing import Dict -from . import __REUSE_version__ from .project import Project from .report import ProjectReport @@ -29,87 +28,15 @@ def add_arguments(parser): ) -def collect_data_from_report(report: ProjectReport) -> dict: - """Collects and formats data from report and returns it as a dictionary - - :param report: ProjectReport object - :return: Formatted dictionary containing data from the ProjectReport object - """ - # Setup report data container - data = { - "json_version": "1.0", - "reuse_version": __REUSE_version__, - "non_compliant": { - "missing_licenses": report.missing_licenses, - "unused_licenses": [str(f) for f in report.unused_licenses], - "deprecated_licenses": [str(f) for f in report.deprecated_licenses], - "bad_licenses": report.bad_licenses, - "licenses_without_extension": list( - report.licenses_without_extension.values() - ), - "missing_copyright_info": [ - str(f) for f in report.files_without_copyright - ], - "missing_licensing_info": [ - str(f) for f in report.files_without_licenses - ], - "read_error": [str(f) for f in report.read_errors], - }, - "files": {}, - "summary": { - "used_licenses": [], - }, - } - - # Populate 'files' - for file in report.file_reports: - copyrights = file.spdxfile.copyright.split("\n") - data["files"][str(file.path)] = { - "copyrights": [ - {"value": cop, "source": file.spdxfile.name} - for cop in copyrights - ], - "licenses": [ - {"value": lic, "source": file.spdxfile.name} - for lic in file.spdxfile.licenses_in_file - ], - } - - # Populate 'summary' - number_of_files = len(report.file_reports) - is_compliant = not any( - any(result) - for result in ( - data["non_compliant"]["missing_licenses"], - data["non_compliant"]["unused_licenses"], - data["non_compliant"]["bad_licenses"], - data["non_compliant"]["deprecated_licenses"], - data["non_compliant"]["licenses_without_extension"], - data["non_compliant"]["missing_copyright_info"], - data["non_compliant"]["missing_licensing_info"], - data["non_compliant"]["read_error"], - ) - ) - data["summary"] = { - "used_licenses": list(report.used_licenses), - "files_total": number_of_files, - "files_with_copyright_info": number_of_files - - len(report.files_without_copyright), - "files_with_licensing_info": number_of_files - - len(report.files_without_licenses), - "compliant": is_compliant, - } - return data - - # pylint: disable=too-many-locals, too-many-branches, too-many-statements -def format_plain(data: Dict) -> str: +def format_plain(report: ProjectReport) -> str: """Formats data dictionary as plaintext string to be printed to sys.stdout - :param data: Dictionary containing formatted ProjectReport data + :param report: ProjectReport data :return: String (in plaintext) that can be output to sys.stdout """ output = "" + data = report.to_dict() # If the project is not compliant: if not data["summary"]["compliant"]: @@ -285,31 +212,32 @@ def format_plain(data: Dict) -> str: return output -def format_json(data: Dict) -> str: +def format_json(report: ProjectReport) -> str: """Formats data dictionary as JSON string ready to be printed to sys.stdout - :param data: Dictionary containing formatted ProjectReport data + :param report: Dictionary containing formatted ProjectReport data :return: String (representing JSON) that can be output to sys.stdout """ return json.dumps( - # Serialize sets to lists - data, + report.to_dict(), indent=2, + # Serialize sets to lists default=lambda x: list(x) if isinstance(x, set) else x, ) -def lint(data: Dict, formatter=format_plain, out=sys.stdout): +def lint(report: ProjectReport, formatter=format_plain, out=sys.stdout) -> bool: """Lints the entire project - :param data: Dictionary holding formatted ProjectReport data + :param report: Dictionary holding formatted ProjectReport data :param formatter: Callable that formats the data dictionary :param out: Where to output """ - out.write(formatter(data)) + out.write(formatter(report)) + data = report.to_dict() result = data["summary"]["compliant"] return result @@ -327,8 +255,6 @@ def run(args, project: Project, out=sys.stdout, formatter=format_plain): if args.json: formatter = format_json - data = collect_data_from_report(report) - lint(data, formatter=formatter, out=out) - result = data["summary"]["compliant"] + result = lint(report, formatter=formatter, out=out) return 0 if result else 1 diff --git a/src/reuse/report.py b/src/reuse/report.py index 8426d306c..fda7803fc 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -18,7 +18,7 @@ from typing import Iterable, List, NamedTuple, Optional, Set from uuid import uuid4 -from . import __version__ +from . import __version__, __REUSE_version__ from ._util import _LICENSING, _checksum from .project import Project @@ -79,30 +79,79 @@ def __init__(self, do_checksum: bool = True): self._files_without_copyright = None def to_dict(self): - """Turn the report into a json-like dictionary.""" - return { - "path": str(Path(self.path).resolve()), - "licenses": { - identifier: str(path) - for identifier, path in self.licenses.items() - }, - "bad_licenses": { - lic: [str(file_) for file_ in files] - for lic, files in self.bad_licenses.items() - }, - "deprecated_licenses": sorted(self.deprecated_licenses), - "licenses_without_extension": { - identifier: str(path) - for identifier, path in self.licenses_without_extension.items() + """Collects and formats data from report and returns it as a dictionary + + :param report: ProjectReport object + :return: Formatted dictionary containing data from the ProjectReport object + """ + # Setup report data container + data = { + "json_version": "1.0", + "reuse_version": __REUSE_version__, + "non_compliant": { + "missing_licenses": self.missing_licenses, + "unused_licenses": [str(f) for f in self.unused_licenses], + "deprecated_licenses": [str(f) for f in self.deprecated_licenses], + "bad_licenses": self.bad_licenses, + "licenses_without_extension": list( + self.licenses_without_extension.values() + ), + "missing_copyright_info": [ + str(f) for f in self.files_without_copyright + ], + "missing_licensing_info": [ + str(f) for f in self.files_without_licenses + ], + "read_error": [str(f) for f in self.read_errors], }, - "missing_licenses": { - lic: [str(file_) for file_ in files] - for lic, files in self.missing_licenses.items() + "files": {}, + "summary": { + "used_licenses": [], }, - "read_errors": list(map(str, self.read_errors)), - "file_reports": [report.to_dict() for report in self.file_reports], } + # Populate 'files' + for file in self.file_reports: + copyrights = file.spdxfile.copyright.split("\n") + data["files"][str(file.path)] = { + "copyrights": [ + # TODO Find correct source file for copyrights info + {"value": cop, "source": file.spdxfile.name} + for cop in copyrights + ], + "licenses": [ + # TODO Find correct source file for licensing info + {"value": lic, "source": file.spdxfile.name} + for lic in file.spdxfile.licenses_in_file + ], + } + + # Populate 'summary' + number_of_files = len(self.file_reports) + is_compliant = not any( + any(result) + for result in ( + data["non_compliant"]["missing_licenses"], + data["non_compliant"]["unused_licenses"], + data["non_compliant"]["bad_licenses"], + data["non_compliant"]["deprecated_licenses"], + data["non_compliant"]["licenses_without_extension"], + data["non_compliant"]["missing_copyright_info"], + data["non_compliant"]["missing_licensing_info"], + data["non_compliant"]["read_error"], + ) + ) + data["summary"] = { + "used_licenses": list(self.used_licenses), + "files_total": number_of_files, + "files_with_copyright_info": number_of_files + - len(self.files_without_copyright), + "files_with_licensing_info": number_of_files + - len(self.files_without_licenses), + "compliant": is_compliant, + } + return data + def bill_of_materials( self, creator_person: Optional[str] = None, @@ -379,6 +428,7 @@ def generate( report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}" spdx_info = project.spdx_info_of(path) + # TODO Return source of licensing and copyright info together with SPDX info. Depends on #669 for expression in spdx_info.spdx_expressions: for identifier in _LICENSING.license_keys(expression): # A license expression akin to Apache-1.0+ should register From 6bb2a4765a5dfd04653142d96daaa36bfb85afaa Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 15 Feb 2023 11:33:13 +0100 Subject: [PATCH 16/60] (convenience) Add `--format` to easily switch between plain and JSON output --- src/reuse/lint.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 624f75d5f..9ac33f653 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -23,9 +23,11 @@ def add_arguments(parser): parser.add_argument( "-q", "--quiet", action="store_true", help=_("prevents output") ) - parser.add_argument( - "-j", "--json", action="store_true", help=_("formats output as JSON") - ) + mutex_group = parser.add_mutually_exclusive_group() + mutex_group.add_argument("-j", "--json", action="store_true", help=_("formats output as JSON")) + mutex_group.add_argument("-p", "--plain", action="store_true", help=_("formats output as plain text")) + mutex_group.add_argument("--format", nargs="?", choices=("json", "plain"), + help=_("formats output using the chosen formatter")) # pylint: disable=too-many-locals, too-many-branches, too-many-statements @@ -252,8 +254,12 @@ def run(args, project: Project, out=sys.stdout, formatter=format_plain): if args.quiet: out = stack.enter_context(open(os.devnull, "w", encoding="utf-8")) - if args.json: + if args.json or args.format == "json": formatter = format_json + elif args.plain or args.format == "plain": + formatter = format_plain + else: + formatter = format_plain result = lint(report, formatter=formatter, out=out) From 6bc172a924bb3b9e9190aed97730885b38da5e77 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 15 Feb 2023 11:33:47 +0100 Subject: [PATCH 17/60] Get `__REUSE_version__` from report object --- src/reuse/lint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 9ac33f653..c13be85f6 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -204,12 +204,12 @@ def format_plain(report: ProjectReport) -> str: output += _( "Congratulations! Your project is compliant with version" " {} of the REUSE Specification :-)" - ).format(__REUSE_version__) + ).format(data["reuse_version"]) else: output += _( "Unfortunately, your project is not compliant with version " "{} of the REUSE Specification :-(" - ).format(__REUSE_version__) + ).format(data["reuse_version"]) return output From 2a2e44fd28e59f318cd04719b98ff9801b1ec319 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 15 Feb 2023 11:33:59 +0100 Subject: [PATCH 18/60] Add newline at the end of the output --- src/reuse/lint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index c13be85f6..21af472da 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -211,6 +211,7 @@ def format_plain(report: ProjectReport) -> str: "{} of the REUSE Specification :-(" ).format(data["reuse_version"]) + output += "\n" return output From 5c25198cb1ff41f2656c47b4875afb08897043d9 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 15 Feb 2023 11:39:47 +0100 Subject: [PATCH 19/60] Update tests according to code changes --- tests/test_lint.py | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 0cc41dceb..807ef0001 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -10,7 +10,7 @@ import pytest -from reuse.lint import collect_data_from_report, lint +from reuse.lint import lint from reuse.project import Project from reuse.report import ProjectReport @@ -32,8 +32,7 @@ def test_lint_simple(fake_repository): """Extremely simple test for lint.""" project = Project(fake_repository) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data) + result = lint(report) assert result @@ -41,8 +40,7 @@ def test_lint_git(git_repository): """Extremely simple test for lint with a git repository.""" project = Project(git_repository) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data) + result = lint(report) assert result @@ -51,8 +49,7 @@ def test_lint_submodule(submodule_repository): project = Project(submodule_repository) (submodule_repository / "submodule/foo.c").write_text("foo") report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data) + result = lint(report) assert result @@ -61,8 +58,7 @@ def test_lint_submodule_included(submodule_repository): project = Project(submodule_repository, include_submodules=True) (submodule_repository / "submodule/foo.c").write_text("foo") report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data) + result = lint(report) assert not result @@ -70,8 +66,7 @@ def test_lint_empty_directory(empty_directory): """An empty directory is compliant.""" project = Project(empty_directory) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data) + result = lint(report) assert result @@ -87,8 +82,7 @@ def test_lint_deprecated(fake_repository, stringio): project = Project(fake_repository) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data, out=stringio) + result = lint(report, out=stringio) assert not result assert "GPL-3.0" in stringio.getvalue() @@ -101,8 +95,7 @@ def test_lint_bad_license(fake_repository, stringio): ) project = Project(fake_repository) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data, out=stringio) + result = lint(report, out=stringio) assert not result assert "foo.py" in stringio.getvalue() @@ -114,8 +107,7 @@ def test_lint_missing_licenses(fake_repository, stringio): (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT") project = Project(fake_repository) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data, out=stringio) + result = lint(report, out=stringio) assert not result assert "foo.py" in stringio.getvalue() @@ -127,8 +119,7 @@ def test_lint_unused_licenses(fake_repository, stringio): (fake_repository / "LICENSES/MIT.txt").write_text("foo") project = Project(fake_repository) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data, out=stringio) + result = lint(report, out=stringio) assert not result assert "Unused licenses: MIT" in stringio.getvalue() @@ -142,8 +133,7 @@ def test_lint_read_errors(fake_repository, stringio): (fake_repository / "foo.py").chmod(0o000) project = Project(fake_repository) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data, out=stringio) + result = lint(report, out=stringio) assert not result assert "Could not read:" in stringio.getvalue() @@ -155,8 +145,7 @@ def test_lint_files_without_copyright_and_licensing(fake_repository, stringio): (fake_repository / "foo.py").write_text("foo") project = Project(fake_repository) report = ProjectReport.generate(project) - data = collect_data_from_report(report) - result = lint(data, out=stringio) + result = lint(report, out=stringio) assert not result assert ( From f697c7375d0c901cdf8903d131b967303ea74166 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 5 Apr 2023 10:41:24 +0200 Subject: [PATCH 20/60] Fix minor issues in `report.py` --- src/reuse/report.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index fda7803fc..b5dbbdd49 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -93,16 +93,10 @@ def to_dict(self): "unused_licenses": [str(f) for f in self.unused_licenses], "deprecated_licenses": [str(f) for f in self.deprecated_licenses], "bad_licenses": self.bad_licenses, - "licenses_without_extension": list( - self.licenses_without_extension.values() - ), - "missing_copyright_info": [ - str(f) for f in self.files_without_copyright - ], - "missing_licensing_info": [ - str(f) for f in self.files_without_licenses - ], - "read_error": [str(f) for f in self.read_errors], + "licenses_without_extension": self.licenses_without_extension, + "missing_copyright_info": [str(f) for f in self.files_without_copyright], + "missing_licensing_info": [str(f) for f in self.files_without_licenses], + "read_errors": [str(f) for f in self.read_errors], }, "files": {}, "summary": { @@ -111,7 +105,7 @@ def to_dict(self): } # Populate 'files' - for file in self.file_reports: + for file in self.file_reports.copy(): copyrights = file.spdxfile.copyright.split("\n") data["files"][str(file.path)] = { "copyrights": [ @@ -138,7 +132,7 @@ def to_dict(self): data["non_compliant"]["licenses_without_extension"], data["non_compliant"]["missing_copyright_info"], data["non_compliant"]["missing_licensing_info"], - data["non_compliant"]["read_error"], + data["non_compliant"]["read_errors"], ) ) data["summary"] = { @@ -279,11 +273,13 @@ def generate( # File report. project_report.file_reports.add(file_report) - # Bad and missing licenses. + # Missing licenses. for missing_license in file_report.missing_licenses: - project_report.missing_licenses.setdefault( - missing_license, set() - ).add(file_report.path) + project_report.missing_licenses.setdefault(missing_license, set()).add( + file_report.path + ) + + # Bad licenses for bad_license in file_report.bad_licenses: project_report.bad_licenses.setdefault(bad_license, set()).add( file_report.path From f20429f4ed23872c92444dacf5d30b19bc19af5e Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 5 Apr 2023 10:41:55 +0200 Subject: [PATCH 21/60] Fix plaintext output --- src/reuse/lint.py | 117 ++++++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 60 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 21af472da..07b2ffc49 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -12,7 +12,7 @@ import os import sys from gettext import gettext as _ -from typing import Dict +from pathlib import PosixPath from .project import Project from .report import ProjectReport @@ -29,7 +29,6 @@ def add_arguments(parser): mutex_group.add_argument("--format", nargs="?", choices=("json", "plain"), help=_("formats output using the chosen formatter")) - # pylint: disable=too-many-locals, too-many-branches, too-many-statements def format_plain(report: ProjectReport) -> str: """Formats data dictionary as plaintext string to be printed to sys.stdout @@ -42,57 +41,14 @@ def format_plain(report: ProjectReport) -> str: # If the project is not compliant: if not data["summary"]["compliant"]: - - # Missing copyright and licensing information - files_without_copyright = set( - data["non_compliant"]["missing_copyright_info"] - ) - files_without_license = set( - data["non_compliant"]["missing_licensing_info"] - ) - files_without_both = files_without_license.intersection( - files_without_copyright - ) - - header = ( - "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n" - ) - if files_without_both: - output += header - output += _( - "The following files have no copyright and licensing " - "information:" - ) - output += "\n" - for file in sorted(files_without_both): - output += f"* {file}\n" - output += "\n\n" - - if files_without_copyright - files_without_both: - output += header - output += _("The following files have no copyright information:") - output += "\n" - for file in sorted(files_without_copyright - files_without_both): - output += f"* {file}\n" - output += "\n\n" - - if files_without_license - files_without_both: - output += header - output += _("The following files have no licensing information:") - output += "\n" - for file in sorted(files_without_license - files_without_both): - output += f"* {file}\n" - output += "\n\n" - # Bad licenses bad_licenses = data["non_compliant"]["bad_licenses"] if bad_licenses: output += "# " + _("BAD LICENSES") + "\n\n" - for lic, files in sorted(bad_licenses.items()): - output += f"'{lic}' found in:" + "\n" - for file in sorted(files): - output += f"* {file}\n" - output += "\n\n" + for lic in sorted(bad_licenses.keys()): + output += _("'{}' found in:").format(lic) + "\n" + output += f"* {list(bad_licenses[lic])[0]}" + "\n\n" + output += "\n" # Deprecated licenses deprecated_licenses = data["non_compliant"]["deprecated_licenses"] @@ -110,31 +66,31 @@ def format_plain(report: ProjectReport) -> str: if licenses_without_extension: output += "# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n" output += _("The following licenses have no file extension:") + "\n" - for path in sorted(licenses_without_extension): - output += f"* {str(path)}" + "\n" + for lic in sorted(licenses_without_extension): + output += f"* {str(licenses_without_extension[lic])}" + "\n" output += "\n\n" # Missing licenses missing_licenses = data["non_compliant"]["missing_licenses"] if missing_licenses: output += "# " + _("MISSING LICENSES") + "\n\n" - for lic, files in sorted(missing_licenses.items()): - output += f"'{lic}' found in:" + "\n" - for file in sorted(files): + for lic in zip(missing_licenses.keys()): + output += _("'{}' found in:").format(lic[0]) + "\n" + for file in missing_licenses[lic[0]]: output += f"* {file}\n" - output += "\n" + output += "\n\n" # Unused licenses unused_licenses = data["non_compliant"]["unused_licenses"] if unused_licenses: output += "# " + _("UNUSED LICENSES") + "\n\n" output += _("The following licenses are not used:") + "\n" - for lic in sorted(deprecated_licenses): + for lic in sorted(unused_licenses): output += f"* {lic}\n" output += "\n\n" # Read errors - read_errors = data["non_compliant"]["read_error"] + read_errors = data["non_compliant"]["read_errors"] if read_errors: output += "# " + _("READ ERRORS") + "\n\n" output += _("Could not read:") + "\n" @@ -142,6 +98,46 @@ def format_plain(report: ProjectReport) -> str: output += f"* {str(path)}" + "\n" output += "\n\n" + # Missing copyright and licensing information + files_without_copyright = set( + data["non_compliant"]["missing_copyright_info"] + ) + files_without_license = set( + data["non_compliant"]["missing_licensing_info"] + ) + files_without_both = files_without_license.intersection( + files_without_copyright + ) + + header = ( + "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n" + ) + output += header + if files_without_both: + output += _( + "The following files have no copyright and licensing " + "information:" + ) + output += "\n" + for file in sorted(files_without_both): + output += f"* {file}\n" + output += "\n" + + if files_without_copyright - files_without_both: + output += _("The following files have no copyright information:") + output += "\n" + for file in sorted(files_without_copyright - files_without_both): + output += f"* {file}\n" + output += "\n" + + if files_without_license - files_without_both: + output += _("The following files have no licensing information:") + output += "\n" + for file in sorted(files_without_license - files_without_both): + output += f"* {file}\n" + output += "\n" + + output += "\n" output += "# " + _("SUMMARY") output += "\n\n" @@ -155,7 +151,7 @@ def format_plain(report: ProjectReport) -> str: _("Licenses without file extension:"), ", ".join( [ - file.parts[-1] + file for file in data["non_compliant"][ "licenses_without_extension" ] @@ -173,7 +169,7 @@ def format_plain(report: ProjectReport) -> str: (_("Used licenses:"), ", ".join(data["summary"]["used_licenses"])), ( _("Read errors: {count}").format( - count=len(data["non_compliant"]["read_error"]) + count=len(data["non_compliant"]["read_errors"]) ), "empty", ), @@ -200,6 +196,7 @@ def format_plain(report: ProjectReport) -> str: value = "" output += "* " + key + " " + value + "\n" + output += "\n" if data["summary"]["compliant"]: output += _( "Congratulations! Your project is compliant with version" @@ -210,8 +207,8 @@ def format_plain(report: ProjectReport) -> str: "Unfortunately, your project is not compliant with version " "{} of the REUSE Specification :-(" ).format(data["reuse_version"]) - output += "\n" + return output From 1b2bd90d059618edbe6f5ba26b4cd28a3bf8ed30 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 5 Apr 2023 10:42:03 +0200 Subject: [PATCH 22/60] Add custom JSON serializer --- src/reuse/lint.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 07b2ffc49..71ddbb739 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -219,11 +219,24 @@ def format_json(report: ProjectReport) -> str: :return: String (representing JSON) that can be output to sys.stdout """ + output = report.to_dict() + + def custom_serializer(obj): + """Custom serializer for the dictionary output of ProjectReport + + :param obj: Object to be serialized + """ + if isinstance(obj, PosixPath): + return str(obj) + if isinstance(obj, set): + return list(obj) + raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable") + return json.dumps( report.to_dict(), indent=2, # Serialize sets to lists - default=lambda x: list(x) if isinstance(x, set) else x, + default=custom_serializer ) From d98d162715ed0710362a7b9243af24cc5f311610 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 5 Apr 2023 11:18:45 +0200 Subject: [PATCH 23/60] Adapt `SpdxInfo` namedtuple to contain source path --- src/reuse/__init__.py | 3 ++- src/reuse/_util.py | 5 +++-- src/reuse/header.py | 3 ++- src/reuse/project.py | 8 +++++--- tests/test_header.py | 36 ++++++++++++++++++------------------ tests/test_util.py | 2 +- 6 files changed, 31 insertions(+), 26 deletions(-) diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index 70b097454..dd770baeb 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -20,7 +20,7 @@ import os import re from dataclasses import dataclass, field -from typing import NamedTuple, Set +from typing import NamedTuple, Optional, Set try: from importlib.metadata import PackageNotFoundError, version @@ -95,6 +95,7 @@ class SpdxInfo: spdx_expressions: Set[Expression] = field(default_factory=set) copyright_lines: Set[str] = field(default_factory=set) contributor_lines: Set[str] = field(default_factory=set) + license_path: Optional[str] = None def __bool__(self): return any(self.__dict__.values()) diff --git a/src/reuse/_util.py b/src/reuse/_util.py index b50fb080f..19780a3b8 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -208,11 +208,12 @@ def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo: result = dep5_copyright.find_files_paragraph(Path(path).as_posix()) if result is None: - return SpdxInfo(set(), set()) + return SpdxInfo(set(), set(), str(path)) return SpdxInfo( set(map(_LICENSING.parse, [result.license.synopsis])), set(map(str.strip, result.copyright.splitlines())), + str(path) ) @@ -316,7 +317,7 @@ def extract_spdx_info(text: str) -> SpdxInfo: copyright_matches.add(match.groupdict()["copyright"].strip()) break - return SpdxInfo(expressions, copyright_matches) + return SpdxInfo(expressions, copyright_matches, "") def find_license_identifiers(text: str) -> Iterator[str]: diff --git a/src/reuse/header.py b/src/reuse/header.py index 520fcaf28..8454c283d 100644 --- a/src/reuse/header.py +++ b/src/reuse/header.py @@ -178,6 +178,7 @@ def create_header( spdx_info.spdx_expressions.union(existing_spdx.spdx_expressions), spdx_copyrights, spdx_info.contributor_lines.union(existing_spdx.contributor_lines), + "", ) new_header += _create_new_header( @@ -784,7 +785,7 @@ def run(args, project: Project, out=sys.stdout) -> int: set(args.contributor) if args.contributor is not None else set() ) - spdx_info = SpdxInfo(expressions, copyright_lines, contributors) + spdx_info = SpdxInfo(expressions, copyright_lines, contributors, "") result = 0 for path in paths: diff --git a/src/reuse/project.py b/src/reuse/project.py index 7801a1294..b47175fa4 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -144,13 +144,14 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: """Return SPDX info of *path*. This function will return any SPDX information that it can find, both - from within the file and from the .reuse/dep5 file. + from within the file, the .license file and from the .reuse/dep5 file. """ path = _determine_license_path(path) _LOGGER.debug(f"searching '{path}' for SPDX information") - dep5_result = SpdxInfo(set(), set()) - file_result = SpdxInfo(set(), set()) + # NOTE This means that only one 'source' of licensing/copyright information is captured in SpdxInfo + dep5_result = SpdxInfo(set(), set(), "") + file_result = SpdxInfo(set(), set(), "") # Search the .reuse/dep5 file for SPDX information. if self._copyright: @@ -188,6 +189,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: return SpdxInfo( dep5_result.spdx_expressions.union(file_result.spdx_expressions), dep5_result.copyright_lines.union(file_result.copyright_lines), + str(path) ) def relative_from_root(self, path: Path) -> Path: diff --git a/tests/test_header.py b/tests/test_header.py index c97d41443..5eb3e80b1 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -24,7 +24,7 @@ def test_create_header_simple(): """Create a super simple header.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) expected = cleandoc( """ @@ -57,7 +57,7 @@ def test_create_header_simple_with_contributor(): def test_create_header_template_simple(template_simple): """Create a header with a simple template.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) expected = cleandoc( """ @@ -77,7 +77,7 @@ def test_create_header_template_simple(template_simple): def test_create_header_template_no_spdx(template_no_spdx): """Create a header with a template that does not have all SPDX info.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) with pytest.raises(MissingSpdxInfo): @@ -87,7 +87,7 @@ def test_create_header_template_no_spdx(template_no_spdx): def test_create_header_template_commented(template_commented): """Create a header with an already-commented template.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) expected = cleandoc( """ @@ -113,7 +113,7 @@ def test_create_header_template_commented(template_commented): def test_create_header_already_contains_spdx(): """Create a new header from a header that already contains SPDX info.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) existing = cleandoc( """ @@ -138,7 +138,7 @@ def test_create_header_already_contains_spdx(): def test_create_header_existing_is_wrong(): """If the existing header contains errors, raise a CommentCreateError.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) existing = cleandoc( """ @@ -154,7 +154,7 @@ def test_create_header_existing_is_wrong(): def test_create_header_old_syntax(): """Old copyright syntax is preserved when creating a new header.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") existing = cleandoc( """ # Copyright John Doe @@ -173,7 +173,7 @@ def test_create_header_old_syntax(): def test_create_header_remove_fluff(): """Any stuff that isn't SPDX info is removed when using create_header.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") existing = cleandoc( """ # SPDX-FileCopyrightText: John Doe @@ -199,7 +199,7 @@ def test_add_new_header_simple(): the old one. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) text = cleandoc( """ @@ -229,7 +229,7 @@ def test_add_new_header_simple(): def test_find_and_replace_no_header(): """Given text without header, add a header.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) text = "pass" expected = cleandoc( @@ -251,7 +251,7 @@ def test_find_and_replace_no_header(): def test_find_and_replace_verbatim(): """Replace a header with itself.""" - spdx_info = SpdxInfo(set(), set()) + spdx_info = SpdxInfo(set(), set(), "") text = cleandoc( """ # SPDX-FileCopyrightText: Jane Doe @@ -270,7 +270,7 @@ def test_find_and_replace_newline_before_header(): preceding whitespace. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" ) text = cleandoc( """ @@ -297,7 +297,7 @@ def test_find_and_replace_newline_before_header(): def test_find_and_replace_preserve_preceding(): """When the SPDX header is in the middle of the file, keep it there.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" ) text = cleandoc( """ @@ -335,7 +335,7 @@ def test_find_and_replace_keep_shebang(): it. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" ) text = cleandoc( """ @@ -366,7 +366,7 @@ def test_find_and_replace_separate_shebang(): """When the shebang is part of the same comment as the SPDX comment, separate the two. """ - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") text = cleandoc( """ #!/usr/bin/env python3 @@ -394,7 +394,7 @@ def test_find_and_replace_separate_shebang(): def test_find_and_replace_only_shebang(): """When the file only contains a shebang, keep it at the top of the file.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") text = cleandoc( """ #!/usr/bin/env python3 @@ -424,7 +424,7 @@ def test_find_and_replace_keep_old_comment(): licensing information, preserve it below the REUSE header. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) text = cleandoc( """ @@ -451,7 +451,7 @@ def test_find_and_replace_keep_old_comment(): def test_find_and_replace_preserve_newline(): """If the file content ends with a newline, don't remove it.""" - spdx_info = SpdxInfo(set(), set()) + spdx_info = SpdxInfo(set(), set(), "") text = ( cleandoc( """ diff --git a/tests/test_util.py b/tests/test_util.py index efa09b631..49011e65d 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -78,7 +78,7 @@ def test_extract_no_info(): object. """ result = _util.extract_spdx_info("") - assert result == _util.SpdxInfo(set(), set()) + assert result == _util.SpdxInfo(set(), set(), "") def test_extract_tab(): From 9c106f14f3b8335e78004326227796ca64c18b99 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 5 Apr 2023 11:19:48 +0200 Subject: [PATCH 24/60] Remove useless line --- src/reuse/lint.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 71ddbb739..9bac4e6a7 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -219,8 +219,6 @@ def format_json(report: ProjectReport) -> str: :return: String (representing JSON) that can be output to sys.stdout """ - output = report.to_dict() - def custom_serializer(obj): """Custom serializer for the dictionary output of ProjectReport From 9beac70726695768ae642ccd757ad8ea95374e61 Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 5 Apr 2023 11:56:41 +0200 Subject: [PATCH 25/60] Actually return license/copyright source path --- src/reuse/project.py | 7 +++++++ src/reuse/report.py | 6 ++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index b47175fa4..f589b3cb4 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -145,10 +145,14 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: This function will return any SPDX information that it can find, both from within the file, the .license file and from the .reuse/dep5 file. + + It also returns a single primary source path of the license/copyright + information, where 'primary' means '.license file' > 'header' > 'dep5' """ path = _determine_license_path(path) _LOGGER.debug(f"searching '{path}' for SPDX information") + license_path = "" # NOTE This means that only one 'source' of licensing/copyright information is captured in SpdxInfo dep5_result = SpdxInfo(set(), set(), "") file_result = SpdxInfo(set(), set(), "") @@ -162,6 +166,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: _LOGGER.info( _("'{path}' covered by .reuse/dep5").format(path=path) ) + license_path = ".reuse/dep5" # Search the file for SPDX information. with path.open("rb") as fp: @@ -178,6 +183,8 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: file_result = extract_spdx_info( decoded_text_from_binary(fp, size=read_limit) ) + if any(file_result): + license_path = str(path) except (ExpressionError, ParseError): _LOGGER.error( _( diff --git a/src/reuse/report.py b/src/reuse/report.py index b5dbbdd49..76a2385c1 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -109,13 +109,11 @@ def to_dict(self): copyrights = file.spdxfile.copyright.split("\n") data["files"][str(file.path)] = { "copyrights": [ - # TODO Find correct source file for copyrights info - {"value": cop, "source": file.spdxfile.name} + {"value": cop, "source": file.spdxfile.info.license_path} for cop in copyrights ], "licenses": [ - # TODO Find correct source file for licensing info - {"value": lic, "source": file.spdxfile.name} + {"value": lic, "source": file.spdxfile.info.license_path} for lic in file.spdxfile.licenses_in_file ], } From c43c91fbc5afacd0a745b663e9f1f2b97aea837b Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 5 Apr 2023 12:07:44 +0200 Subject: [PATCH 26/60] Determine license source correctly --- src/reuse/project.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index f589b3cb4..fa85d87a7 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -193,10 +193,12 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: ).format(path=path) ) + spdx_expressions = dep5_result.spdx_expressions.union(file_result.spdx_expressions) + copyright_lines = dep5_result.copyright_lines.union(file_result.copyright_lines) return SpdxInfo( - dep5_result.spdx_expressions.union(file_result.spdx_expressions), - dep5_result.copyright_lines.union(file_result.copyright_lines), - str(path) + spdx_expressions, + copyright_lines, + license_path ) def relative_from_root(self, path: Path) -> Path: From 6fc5ec118c05fd15845d0bcb53ad52bdd36600ee Mon Sep 17 00:00:00 2001 From: linozen Date: Wed, 5 Apr 2023 12:08:05 +0200 Subject: [PATCH 27/60] Allow returning empty arrays --- src/reuse/report.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index 76a2385c1..2ecfbeac8 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -110,11 +110,11 @@ def to_dict(self): data["files"][str(file.path)] = { "copyrights": [ {"value": cop, "source": file.spdxfile.info.license_path} - for cop in copyrights + for cop in copyrights if cop ], "licenses": [ {"value": lic, "source": file.spdxfile.info.license_path} - for lic in file.spdxfile.licenses_in_file + for lic in file.spdxfile.licenses_in_file if lic ], } @@ -364,6 +364,7 @@ def __init__(self, name, spdx_id=None, chk_sum=None): self.licenses_in_file: List[str] = [] self.license_concluded: str = None self.copyright: str = None + self.info: SpdxInfo = None class FileReport: @@ -462,7 +463,7 @@ def generate( # Copyright text report.spdxfile.copyright = "\n".join(sorted(spdx_info.copyright_lines)) - + report.spdxfile.info = spdx_info return report def __hash__(self): From 3ad1fe38fadc61469113f44fe1aa215d025f5c48 Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 6 Apr 2023 08:10:58 +0200 Subject: [PATCH 28/60] Satisfy black --- src/reuse/_util.py | 2 +- src/reuse/lint.py | 35 ++++++++++++++++++++------------- src/reuse/project.py | 16 ++++++++------- src/reuse/report.py | 47 +++++++++++++++++++++++++------------------- 4 files changed, 58 insertions(+), 42 deletions(-) diff --git a/src/reuse/_util.py b/src/reuse/_util.py index 19780a3b8..bb60a9244 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -213,7 +213,7 @@ def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo: return SpdxInfo( set(map(_LICENSING.parse, [result.license.synopsis])), set(map(str.strip, result.copyright.splitlines())), - str(path) + str(path), ) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 9bac4e6a7..b12964387 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -24,10 +24,22 @@ def add_arguments(parser): "-q", "--quiet", action="store_true", help=_("prevents output") ) mutex_group = parser.add_mutually_exclusive_group() - mutex_group.add_argument("-j", "--json", action="store_true", help=_("formats output as JSON")) - mutex_group.add_argument("-p", "--plain", action="store_true", help=_("formats output as plain text")) - mutex_group.add_argument("--format", nargs="?", choices=("json", "plain"), - help=_("formats output using the chosen formatter")) + mutex_group.add_argument( + "-j", "--json", action="store_true", help=_("formats output as JSON") + ) + mutex_group.add_argument( + "-p", + "--plain", + action="store_true", + help=_("formats output as plain text"), + ) + mutex_group.add_argument( + "--format", + nargs="?", + choices=("json", "plain"), + help=_("formats output using the chosen formatter"), + ) + # pylint: disable=too-many-locals, too-many-branches, too-many-statements def format_plain(report: ProjectReport) -> str: @@ -149,14 +161,7 @@ def format_plain(report: ProjectReport) -> str: ), ( _("Licenses without file extension:"), - ", ".join( - [ - file - for file in data["non_compliant"][ - "licenses_without_extension" - ] - ] - ), + ", ".join(data["non_compliant"]["licenses_without_extension"]), ), ( _("Missing licenses:"), @@ -228,13 +233,15 @@ def custom_serializer(obj): return str(obj) if isinstance(obj, set): return list(obj) - raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable") + raise TypeError( + f"Object of type {obj.__class__.__name__} is not JSON serializable" + ) return json.dumps( report.to_dict(), indent=2, # Serialize sets to lists - default=custom_serializer + default=custom_serializer, ) diff --git a/src/reuse/project.py b/src/reuse/project.py index fa85d87a7..2f93556ec 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -153,7 +153,9 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: _LOGGER.debug(f"searching '{path}' for SPDX information") license_path = "" - # NOTE This means that only one 'source' of licensing/copyright information is captured in SpdxInfo + + # This means that only one 'source' of licensing/copyright information + # is captured in SpdxInfo dep5_result = SpdxInfo(set(), set(), "") file_result = SpdxInfo(set(), set(), "") @@ -193,13 +195,13 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: ).format(path=path) ) - spdx_expressions = dep5_result.spdx_expressions.union(file_result.spdx_expressions) - copyright_lines = dep5_result.copyright_lines.union(file_result.copyright_lines) - return SpdxInfo( - spdx_expressions, - copyright_lines, - license_path + spdx_expressions = dep5_result.spdx_expressions.union( + file_result.spdx_expressions + ) + copyright_lines = dep5_result.copyright_lines.union( + file_result.copyright_lines ) + return SpdxInfo(spdx_expressions, copyright_lines, license_path) def relative_from_root(self, path: Path) -> Path: """If the project root is /tmp/project, and *path* is diff --git a/src/reuse/report.py b/src/reuse/report.py index 2ecfbeac8..d7e7b57d7 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -18,9 +18,9 @@ from typing import Iterable, List, NamedTuple, Optional, Set from uuid import uuid4 -from . import __version__, __REUSE_version__ +from . import __REUSE_version__, __version__ from ._util import _LICENSING, _checksum -from .project import Project +from .project import Project, SpdxInfo _LOGGER = logging.getLogger(__name__) @@ -82,7 +82,7 @@ def to_dict(self): """Collects and formats data from report and returns it as a dictionary :param report: ProjectReport object - :return: Formatted dictionary containing data from the ProjectReport object + :return: Dictionary containing data from the ProjectReport object """ # Setup report data container data = { @@ -90,13 +90,19 @@ def to_dict(self): "reuse_version": __REUSE_version__, "non_compliant": { "missing_licenses": self.missing_licenses, - "unused_licenses": [str(f) for f in self.unused_licenses], - "deprecated_licenses": [str(f) for f in self.deprecated_licenses], + "unused_licenses": [str(file) for file in self.unused_licenses], + "deprecated_licenses": [ + str(file) for file in self.deprecated_licenses + ], "bad_licenses": self.bad_licenses, "licenses_without_extension": self.licenses_without_extension, - "missing_copyright_info": [str(f) for f in self.files_without_copyright], - "missing_licensing_info": [str(f) for f in self.files_without_licenses], - "read_errors": [str(f) for f in self.read_errors], + "missing_copyright_info": [ + str(file) for file in self.files_without_copyright + ], + "missing_licensing_info": [ + str(file) for file in self.files_without_licenses + ], + "read_errors": [str(file) for file in self.read_errors], }, "files": {}, "summary": { @@ -110,11 +116,13 @@ def to_dict(self): data["files"][str(file.path)] = { "copyrights": [ {"value": cop, "source": file.spdxfile.info.license_path} - for cop in copyrights if cop + for cop in copyrights + if cop ], "licenses": [ {"value": lic, "source": file.spdxfile.info.license_path} - for lic in file.spdxfile.licenses_in_file if lic + for lic in file.spdxfile.licenses_in_file + if lic ], } @@ -137,9 +145,9 @@ def to_dict(self): "used_licenses": list(self.used_licenses), "files_total": number_of_files, "files_with_copyright_info": number_of_files - - len(self.files_without_copyright), + - len(self.files_without_copyright), "files_with_licensing_info": number_of_files - - len(self.files_without_licenses), + - len(self.files_without_licenses), "compliant": is_compliant, } return data @@ -163,8 +171,7 @@ def bill_of_materials( # TODO: Generate UUID from git revision maybe # TODO: Fix the URL out.write( - f"DocumentNamespace:" - f" http://spdx.org/spdxdocs/spdx-v2.1-{uuid4()}\n" + f"DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-{uuid4()}\n" ) # Author @@ -187,7 +194,7 @@ def bill_of_materials( for report in reports: out.write( - f"Relationship: SPDXRef-DOCUMENT describes" + "Relationship: SPDXRef-DOCUMENT describes" f" {report.spdxfile.spdx_id}\n" ) @@ -204,7 +211,7 @@ def bill_of_materials( out.write(f"LicenseInfoInFile: {lic}\n") if report.spdxfile.copyright: out.write( - f"FileCopyrightText:" + "FileCopyrightText:" f" {report.spdxfile.copyright}\n" ) else: @@ -273,9 +280,9 @@ def generate( # Missing licenses. for missing_license in file_report.missing_licenses: - project_report.missing_licenses.setdefault(missing_license, set()).add( - file_report.path - ) + project_report.missing_licenses.setdefault( + missing_license, set() + ).add(file_report.path) # Bad licenses for bad_license in file_report.bad_licenses: @@ -423,7 +430,6 @@ def generate( report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}" spdx_info = project.spdx_info_of(path) - # TODO Return source of licensing and copyright info together with SPDX info. Depends on #669 for expression in spdx_info.spdx_expressions: for identifier in _LICENSING.license_keys(expression): # A license expression akin to Apache-1.0+ should register @@ -463,6 +469,7 @@ def generate( # Copyright text report.spdxfile.copyright = "\n".join(sorted(spdx_info.copyright_lines)) + # Source of licensing and copyright info report.spdxfile.info = spdx_info return report From 2fcc8ebfd28f189aefaf628f4d0c120c797bf4a1 Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 6 Apr 2023 08:21:23 +0200 Subject: [PATCH 29/60] Ensure consistency of the JSON output --- src/reuse/lint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index b12964387..cf9544d8d 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -242,6 +242,7 @@ def custom_serializer(obj): indent=2, # Serialize sets to lists default=custom_serializer, + sort_keys=True, ) From d50d15e9bd9e92e77fa218d551d2777b9cf35087 Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 6 Apr 2023 08:25:52 +0200 Subject: [PATCH 30/60] Ensure consistency of the plaintext output --- src/reuse/lint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index cf9544d8d..356784ed4 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -86,9 +86,9 @@ def format_plain(report: ProjectReport) -> str: missing_licenses = data["non_compliant"]["missing_licenses"] if missing_licenses: output += "# " + _("MISSING LICENSES") + "\n\n" - for lic in zip(missing_licenses.keys()): - output += _("'{}' found in:").format(lic[0]) + "\n" - for file in missing_licenses[lic[0]]: + for lic in sorted(missing_licenses.keys()): + output += _("'{}' found in:").format(lic) + "\n" + for file in sorted(missing_licenses[lic]): output += f"* {file}\n" output += "\n\n" From fbf6e534ef96bf2ed45129b289678e3517f4d965 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Mon, 10 Apr 2023 18:21:40 +0200 Subject: [PATCH 31/60] Move logic to FileReport.to_dict and rename method to to_dict_lint Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 6 +++--- src/reuse/report.py | 47 +++++++++++++++++++++----------------------- tests/test_report.py | 4 ++-- 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 356784ed4..a0d568b1a 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -49,7 +49,7 @@ def format_plain(report: ProjectReport) -> str: :return: String (in plaintext) that can be output to sys.stdout """ output = "" - data = report.to_dict() + data = report.to_dict_lint() # If the project is not compliant: if not data["summary"]["compliant"]: @@ -238,7 +238,7 @@ def custom_serializer(obj): ) return json.dumps( - report.to_dict(), + report.to_dict_lint(), indent=2, # Serialize sets to lists default=custom_serializer, @@ -256,7 +256,7 @@ def lint(report: ProjectReport, formatter=format_plain, out=sys.stdout) -> bool: out.write(formatter(report)) - data = report.to_dict() + data = report.to_dict_lint() result = data["summary"]["compliant"] return result diff --git a/src/reuse/report.py b/src/reuse/report.py index d7e7b57d7..982afaa03 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -78,10 +78,10 @@ def __init__(self, do_checksum: bool = True): self._files_without_licenses = None self._files_without_copyright = None - def to_dict(self): - """Collects and formats data from report and returns it as a dictionary + def to_dict_lint(self): + """Collects and formats data relevant to linting from report and returns + it as a dictionary. - :param report: ProjectReport object :return: Dictionary containing data from the ProjectReport object """ # Setup report data container @@ -104,27 +104,15 @@ def to_dict(self): ], "read_errors": [str(file) for file in self.read_errors], }, - "files": {}, + "files": [], "summary": { "used_licenses": [], }, } # Populate 'files' - for file in self.file_reports.copy(): - copyrights = file.spdxfile.copyright.split("\n") - data["files"][str(file.path)] = { - "copyrights": [ - {"value": cop, "source": file.spdxfile.info.license_path} - for cop in copyrights - if cop - ], - "licenses": [ - {"value": lic, "source": file.spdxfile.info.license_path} - for lic in file.spdxfile.licenses_in_file - if lic - ], - } + for file_report in self.file_reports: + data["files"].append(file_report.to_dict_lint()) # Populate 'summary' number_of_files = len(self.file_reports) @@ -389,15 +377,24 @@ def __init__( self.bad_licenses = set() self.missing_licenses = set() - def to_dict(self): - """Turn the report into a json-like dictionary.""" + def to_dict_lint(self): + """Turn the report into a json-like dictionary with exclusively + information relevant for linting. + """ return { "path": str(Path(self.path).resolve()), - "name": self.spdxfile.name, - "spdx_id": self.spdxfile.spdx_id, - "chk_sum": self.spdxfile.chk_sum, - "licenses_in_file": sorted(self.spdxfile.licenses_in_file), - "copyright": self.spdxfile.copyright, + # TODO: Why does every copyright line have the same source? + "copyrights": [ + {"value": copyright_, "source": self.spdxfile.info.license_path} + for copyright_ in self.spdxfile.copyright.split("\n") + if copyright_ + ], + # TODO: Why does every license expression have the same source? + "licenses": [ + {"value": license_, "source": self.spdxfile.info.license_path} + for license_ in self.spdxfile.licenses_in_file + if license_ + ], } @classmethod diff --git a/tests/test_report.py b/tests/test_report.py index 1ff0da58c..5a5c77c49 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -378,12 +378,12 @@ def test_generate_project_report_read_error(fake_repository, multiprocessing): assert (fake_repository / "bad") in result.read_errors -def test_generate_project_report_to_dict(fake_repository, multiprocessing): +def test_generate_project_report_to_dict_lint(fake_repository, multiprocessing): """Extremely simple test for ProjectReport.to_dict.""" project = Project(fake_repository) report = ProjectReport.generate(project, multiprocessing=multiprocessing) # TODO: Actually do something - report.to_dict() + report.to_dict_lint() def test_bill_of_materials(fake_repository, multiprocessing): From bd5946573a3a309eb0d92d77e54247266e2a3c65 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Mon, 10 Apr 2023 18:32:23 +0200 Subject: [PATCH 32/60] Create ProjectReport.is_compliant Signed-off-by: Carmen Bianca BAKKER --- src/reuse/report.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index 982afaa03..63c125122 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -77,6 +77,7 @@ def __init__(self, do_checksum: bool = True): self._used_licenses = None self._files_without_licenses = None self._files_without_copyright = None + self._is_compliant = None def to_dict_lint(self): """Collects and formats data relevant to linting from report and returns @@ -116,19 +117,6 @@ def to_dict_lint(self): # Populate 'summary' number_of_files = len(self.file_reports) - is_compliant = not any( - any(result) - for result in ( - data["non_compliant"]["missing_licenses"], - data["non_compliant"]["unused_licenses"], - data["non_compliant"]["bad_licenses"], - data["non_compliant"]["deprecated_licenses"], - data["non_compliant"]["licenses_without_extension"], - data["non_compliant"]["missing_copyright_info"], - data["non_compliant"]["missing_licensing_info"], - data["non_compliant"]["read_errors"], - ) - ) data["summary"] = { "used_licenses": list(self.used_licenses), "files_total": number_of_files, @@ -136,7 +124,7 @@ def to_dict_lint(self): - len(self.files_without_copyright), "files_with_licensing_info": number_of_files - len(self.files_without_licenses), - "compliant": is_compliant, + "compliant": self.is_compliant, } return data @@ -346,6 +334,27 @@ def files_without_copyright(self) -> Iterable[PathLike]: return self._files_without_copyright + @property + def is_compliant(self) -> bool: + """Whether the report is compliant with the REUSE Spec.""" + if self._is_compliant is not None: + return self._is_compliant + + self._is_compliant = not any( + ( + self.missing_licenses, + self.unused_licenses, + self.bad_licenses, + self.deprecated_licenses, + self.licenses_without_extension, + self.files_without_copyright, + self.files_without_licenses, + self.read_errors, + ) + ) + + return self._is_compliant + class _File: # pylint: disable=too-few-public-methods """Represent an SPDX file. Sufficiently enough for our purposes, in any From 8f3b5d0d33feda9ff19baee0f9e18c5c4db1c2ad Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Mon, 10 Apr 2023 18:33:36 +0200 Subject: [PATCH 33/60] Rename json_version to lint_version Signed-off-by: Carmen Bianca BAKKER --- src/reuse/report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index 63c125122..9704c2a38 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -87,7 +87,7 @@ def to_dict_lint(self): """ # Setup report data container data = { - "json_version": "1.0", + "lint_version": "1.0", "reuse_version": __REUSE_version__, "non_compliant": { "missing_licenses": self.missing_licenses, From 8feed9e53dd2f669603dc253e79c9306d50ae386 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Mon, 10 Apr 2023 19:07:17 +0200 Subject: [PATCH 34/60] Remove superfluous function Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index a0d568b1a..9a5751d0d 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -246,21 +246,6 @@ def custom_serializer(obj): ) -def lint(report: ProjectReport, formatter=format_plain, out=sys.stdout) -> bool: - """Lints the entire project - - :param report: Dictionary holding formatted ProjectReport data - :param formatter: Callable that formats the data dictionary - :param out: Where to output - """ - - out.write(formatter(report)) - - data = report.to_dict_lint() - result = data["summary"]["compliant"] - return result - - def run(args, project: Project, out=sys.stdout, formatter=format_plain): """List all non-compliant files.""" report = ProjectReport.generate( @@ -278,6 +263,6 @@ def run(args, project: Project, out=sys.stdout, formatter=format_plain): else: formatter = format_plain - result = lint(report, formatter=formatter, out=out) + out.write(formatter(report)) - return 0 if result else 1 + return 0 if report.is_compliant else 1 From 7423f622480def5693229ff4449af3f71f6848ed Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Thu, 11 May 2023 11:57:54 +0200 Subject: [PATCH 35/60] Make tests run by importing an existing function Signed-off-by: Carmen Bianca BAKKER --- tests/test_lint.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 807ef0001..0fb42713f 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -10,7 +10,8 @@ import pytest -from reuse.lint import lint +# FIXME: Verify whether format_plain is the thing being tested. +from reuse.lint import format_plain from reuse.project import Project from reuse.report import ProjectReport @@ -32,7 +33,7 @@ def test_lint_simple(fake_repository): """Extremely simple test for lint.""" project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report) + result = format_plain(report) assert result @@ -40,7 +41,7 @@ def test_lint_git(git_repository): """Extremely simple test for lint with a git repository.""" project = Project(git_repository) report = ProjectReport.generate(project) - result = lint(report) + result = format_plain(report) assert result @@ -49,7 +50,7 @@ def test_lint_submodule(submodule_repository): project = Project(submodule_repository) (submodule_repository / "submodule/foo.c").write_text("foo") report = ProjectReport.generate(project) - result = lint(report) + result = format_plain(report) assert result @@ -58,7 +59,7 @@ def test_lint_submodule_included(submodule_repository): project = Project(submodule_repository, include_submodules=True) (submodule_repository / "submodule/foo.c").write_text("foo") report = ProjectReport.generate(project) - result = lint(report) + result = format_plain(report) assert not result @@ -66,7 +67,7 @@ def test_lint_empty_directory(empty_directory): """An empty directory is compliant.""" project = Project(empty_directory) report = ProjectReport.generate(project) - result = lint(report) + result = format_plain(report) assert result @@ -82,7 +83,7 @@ def test_lint_deprecated(fake_repository, stringio): project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report, out=stringio) + result = format_plain(report, out=stringio) assert not result assert "GPL-3.0" in stringio.getvalue() @@ -95,7 +96,7 @@ def test_lint_bad_license(fake_repository, stringio): ) project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report, out=stringio) + result = format_plain(report, out=stringio) assert not result assert "foo.py" in stringio.getvalue() @@ -107,7 +108,7 @@ def test_lint_missing_licenses(fake_repository, stringio): (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT") project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report, out=stringio) + result = format_plain(report, out=stringio) assert not result assert "foo.py" in stringio.getvalue() @@ -119,7 +120,7 @@ def test_lint_unused_licenses(fake_repository, stringio): (fake_repository / "LICENSES/MIT.txt").write_text("foo") project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report, out=stringio) + result = format_plain(report, out=stringio) assert not result assert "Unused licenses: MIT" in stringio.getvalue() @@ -133,7 +134,7 @@ def test_lint_read_errors(fake_repository, stringio): (fake_repository / "foo.py").chmod(0o000) project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report, out=stringio) + result = format_plain(report, out=stringio) assert not result assert "Could not read:" in stringio.getvalue() @@ -145,7 +146,7 @@ def test_lint_files_without_copyright_and_licensing(fake_repository, stringio): (fake_repository / "foo.py").write_text("foo") project = Project(fake_repository) report = ProjectReport.generate(project) - result = lint(report, out=stringio) + result = format_plain(report, out=stringio) assert not result assert ( From f64835684ead47cee7314e97e8398ef80388341d Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 15:29:14 +0200 Subject: [PATCH 36/60] Repair boolean check of SpdxInfo; object not iterable Signed-off-by: Carmen Bianca BAKKER --- src/reuse/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index 2f93556ec..abbbcadf8 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -185,7 +185,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: file_result = extract_spdx_info( decoded_text_from_binary(fp, size=read_limit) ) - if any(file_result): + if file_result: license_path = str(path) except (ExpressionError, ParseError): _LOGGER.error( From c75a3e36d4b51722f8afd71c82dc69d26eddcdea Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 15:36:15 +0200 Subject: [PATCH 37/60] Correctly use format_plain Signed-off-by: Carmen Bianca BAKKER --- tests/test_lint.py | 59 +++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 0fb42713f..050dbf5dd 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -10,7 +10,6 @@ import pytest -# FIXME: Verify whether format_plain is the thing being tested. from reuse.lint import format_plain from reuse.project import Project from reuse.report import ProjectReport @@ -60,7 +59,7 @@ def test_lint_submodule_included(submodule_repository): (submodule_repository / "submodule/foo.c").write_text("foo") report = ProjectReport.generate(project) result = format_plain(report) - assert not result + assert ":-(" in result def test_lint_empty_directory(empty_directory): @@ -71,7 +70,7 @@ def test_lint_empty_directory(empty_directory): assert result -def test_lint_deprecated(fake_repository, stringio): +def test_lint_deprecated(fake_repository): """If a repo has a deprecated license, detect it.""" shutil.copy( fake_repository / "LICENSES/GPL-3.0-or-later.txt", @@ -83,77 +82,77 @@ def test_lint_deprecated(fake_repository, stringio): project = Project(fake_repository) report = ProjectReport.generate(project) - result = format_plain(report, out=stringio) + result = format_plain(report) - assert not result - assert "GPL-3.0" in stringio.getvalue() + assert ":-(" in result + assert "GPL-3.0" in result -def test_lint_bad_license(fake_repository, stringio): +def test_lint_bad_license(fake_repository): """A bad license is detected.""" (fake_repository / "foo.py").write_text( "SPDX-License-Identifier: bad-license" ) project = Project(fake_repository) report = ProjectReport.generate(project) - result = format_plain(report, out=stringio) + result = format_plain(report) - assert not result - assert "foo.py" in stringio.getvalue() - assert "bad-license" in stringio.getvalue() + assert ":-(" in result + assert "foo.py" in result + assert "bad-license" in result -def test_lint_missing_licenses(fake_repository, stringio): +def test_lint_missing_licenses(fake_repository): """A missing license is detected.""" (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT") project = Project(fake_repository) report = ProjectReport.generate(project) - result = format_plain(report, out=stringio) + result = format_plain(report) - assert not result - assert "foo.py" in stringio.getvalue() - assert "MIT" in stringio.getvalue() + assert ":-(" in result + assert "foo.py" in result + assert "MIT" in result -def test_lint_unused_licenses(fake_repository, stringio): +def test_lint_unused_licenses(fake_repository): """An unused license is detected.""" (fake_repository / "LICENSES/MIT.txt").write_text("foo") project = Project(fake_repository) report = ProjectReport.generate(project) - result = format_plain(report, out=stringio) + result = format_plain(report) - assert not result - assert "Unused licenses: MIT" in stringio.getvalue() + assert ":-(" in result + assert "Unused licenses: MIT" in result @cpython @posix -def test_lint_read_errors(fake_repository, stringio): +def test_lint_read_errors(fake_repository): """A read error is detected.""" (fake_repository / "foo.py").write_text("foo") (fake_repository / "foo.py").chmod(0o000) project = Project(fake_repository) report = ProjectReport.generate(project) - result = format_plain(report, out=stringio) + result = format_plain(report) - assert not result - assert "Could not read:" in stringio.getvalue() - assert "foo.py" in stringio.getvalue() + assert ":-(" in result + assert "Could not read:" in result + assert "foo.py" in result -def test_lint_files_without_copyright_and_licensing(fake_repository, stringio): +def test_lint_files_without_copyright_and_licensing(fake_repository): """A file without copyright and licensing is detected.""" (fake_repository / "foo.py").write_text("foo") project = Project(fake_repository) report = ProjectReport.generate(project) - result = format_plain(report, out=stringio) + result = format_plain(report) - assert not result + assert ":-(" in result assert ( "The following files have no copyright and licensing information:" - in stringio.getvalue() + in result ) - assert "foo.py" in stringio.getvalue() + assert "foo.py" in result # REUSE-IgnoreEnd From 5bc2c58bc426efb363ba8fe4280fc75efaa7e009 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 15:41:28 +0200 Subject: [PATCH 38/60] fixup! Rename json_version to lint_version --- tests/test_main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index c2c6d1481..a0c081a5e 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -222,7 +222,7 @@ def test_lint_json(fake_repository, stringio): output = json.loads(stringio.getvalue()) assert result == 0 - assert output["json_version"] == "1.0" + assert output["lint_version"] == "1.0" assert len(output["files"]) == 7 @@ -233,7 +233,7 @@ def test_lint_json_fail(fake_repository, stringio): output = json.loads(stringio.getvalue()) assert result > 0 - assert output["json_version"] == "1.0" + assert output["lint_version"] == "1.0" assert len(output["non_compliant"]["missing_licensing_info"]) == 1 assert len(output["non_compliant"]["missing_copyright_info"]) == 1 assert len(output["files"]) == 8 From 54a8a66bad4063464909617982d1052866921479 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 15:44:30 +0200 Subject: [PATCH 39/60] fixup! fixup! Rename json_version to lint_version --- tests/test_main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index a0c081a5e..c4bf45d70 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -223,7 +223,7 @@ def test_lint_json(fake_repository, stringio): assert result == 0 assert output["lint_version"] == "1.0" - assert len(output["files"]) == 7 + assert len(output["files"]) == 8 def test_lint_json_fail(fake_repository, stringio): @@ -236,7 +236,7 @@ def test_lint_json_fail(fake_repository, stringio): assert output["lint_version"] == "1.0" assert len(output["non_compliant"]["missing_licensing_info"]) == 1 assert len(output["non_compliant"]["missing_copyright_info"]) == 1 - assert len(output["files"]) == 8 + assert len(output["files"]) == 9 def test_lint_no_file_extension(fake_repository, stringio): From d3208db186469dc7090681e6f4a061aaccd0f686 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 15:53:22 +0200 Subject: [PATCH 40/60] Don't pass string where (optional) set is expected Signed-off-by: Carmen Bianca BAKKER --- tests/test_header.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/test_header.py b/tests/test_header.py index 5eb3e80b1..c97d41443 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -24,7 +24,7 @@ def test_create_header_simple(): """Create a super simple header.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) expected = cleandoc( """ @@ -57,7 +57,7 @@ def test_create_header_simple_with_contributor(): def test_create_header_template_simple(template_simple): """Create a header with a simple template.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) expected = cleandoc( """ @@ -77,7 +77,7 @@ def test_create_header_template_simple(template_simple): def test_create_header_template_no_spdx(template_no_spdx): """Create a header with a template that does not have all SPDX info.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) with pytest.raises(MissingSpdxInfo): @@ -87,7 +87,7 @@ def test_create_header_template_no_spdx(template_no_spdx): def test_create_header_template_commented(template_commented): """Create a header with an already-commented template.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) expected = cleandoc( """ @@ -113,7 +113,7 @@ def test_create_header_template_commented(template_commented): def test_create_header_already_contains_spdx(): """Create a new header from a header that already contains SPDX info.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) existing = cleandoc( """ @@ -138,7 +138,7 @@ def test_create_header_already_contains_spdx(): def test_create_header_existing_is_wrong(): """If the existing header contains errors, raise a CommentCreateError.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) existing = cleandoc( """ @@ -154,7 +154,7 @@ def test_create_header_existing_is_wrong(): def test_create_header_old_syntax(): """Old copyright syntax is preserved when creating a new header.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) existing = cleandoc( """ # Copyright John Doe @@ -173,7 +173,7 @@ def test_create_header_old_syntax(): def test_create_header_remove_fluff(): """Any stuff that isn't SPDX info is removed when using create_header.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) existing = cleandoc( """ # SPDX-FileCopyrightText: John Doe @@ -199,7 +199,7 @@ def test_add_new_header_simple(): the old one. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) text = cleandoc( """ @@ -229,7 +229,7 @@ def test_add_new_header_simple(): def test_find_and_replace_no_header(): """Given text without header, add a header.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) text = "pass" expected = cleandoc( @@ -251,7 +251,7 @@ def test_find_and_replace_no_header(): def test_find_and_replace_verbatim(): """Replace a header with itself.""" - spdx_info = SpdxInfo(set(), set(), "") + spdx_info = SpdxInfo(set(), set()) text = cleandoc( """ # SPDX-FileCopyrightText: Jane Doe @@ -270,7 +270,7 @@ def test_find_and_replace_newline_before_header(): preceding whitespace. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} ) text = cleandoc( """ @@ -297,7 +297,7 @@ def test_find_and_replace_newline_before_header(): def test_find_and_replace_preserve_preceding(): """When the SPDX header is in the middle of the file, keep it there.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} ) text = cleandoc( """ @@ -335,7 +335,7 @@ def test_find_and_replace_keep_shebang(): it. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} ) text = cleandoc( """ @@ -366,7 +366,7 @@ def test_find_and_replace_separate_shebang(): """When the shebang is part of the same comment as the SPDX comment, separate the two. """ - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) text = cleandoc( """ #!/usr/bin/env python3 @@ -394,7 +394,7 @@ def test_find_and_replace_separate_shebang(): def test_find_and_replace_only_shebang(): """When the file only contains a shebang, keep it at the top of the file.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) text = cleandoc( """ #!/usr/bin/env python3 @@ -424,7 +424,7 @@ def test_find_and_replace_keep_old_comment(): licensing information, preserve it below the REUSE header. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} ) text = cleandoc( """ @@ -451,7 +451,7 @@ def test_find_and_replace_keep_old_comment(): def test_find_and_replace_preserve_newline(): """If the file content ends with a newline, don't remove it.""" - spdx_info = SpdxInfo(set(), set(), "") + spdx_info = SpdxInfo(set(), set()) text = ( cleandoc( """ From 8a40fb681091221fa43f969da96bf67129dec85e Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 15:55:29 +0200 Subject: [PATCH 41/60] fixup! Don't pass string where (optional) set is expected --- src/reuse/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index abbbcadf8..f73d52f3a 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -156,8 +156,8 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: # This means that only one 'source' of licensing/copyright information # is captured in SpdxInfo - dep5_result = SpdxInfo(set(), set(), "") - file_result = SpdxInfo(set(), set(), "") + dep5_result = SpdxInfo(set(), set()) + file_result = SpdxInfo(set(), set()) # Search the .reuse/dep5 file for SPDX information. if self._copyright: From a769306ba3598583984b8beafe2331856f63c62b Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 16:03:59 +0200 Subject: [PATCH 42/60] fixup! fixup! Don't pass string where (optional) set is expected --- src/reuse/_util.py | 4 ++-- src/reuse/project.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/reuse/_util.py b/src/reuse/_util.py index bb60a9244..693d6615c 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -208,12 +208,12 @@ def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo: result = dep5_copyright.find_files_paragraph(Path(path).as_posix()) if result is None: - return SpdxInfo(set(), set(), str(path)) + return SpdxInfo(set(), set(), license_path=str(path)) return SpdxInfo( set(map(_LICENSING.parse, [result.license.synopsis])), set(map(str.strip, result.copyright.splitlines())), - str(path), + license_path=str(path), ) diff --git a/src/reuse/project.py b/src/reuse/project.py index f73d52f3a..08b79e645 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -201,7 +201,9 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: copyright_lines = dep5_result.copyright_lines.union( file_result.copyright_lines ) - return SpdxInfo(spdx_expressions, copyright_lines, license_path) + return SpdxInfo( + spdx_expressions, copyright_lines, license_path=license_path + ) def relative_from_root(self, path: Path) -> Path: """If the project root is /tmp/project, and *path* is From cc5a383b9ec77219867fb3b471ece0033998a9fa Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 16:13:37 +0200 Subject: [PATCH 43/60] Create SpdxInfo.contains_copyright_or_licensing Signed-off-by: Carmen Bianca BAKKER --- src/reuse/__init__.py | 4 ++++ src/reuse/project.py | 4 ++-- tests/test_core.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 tests/test_core.py diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index dd770baeb..46cd1fd2a 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -97,6 +97,10 @@ class SpdxInfo: contributor_lines: Set[str] = field(default_factory=set) license_path: Optional[str] = None + def contains_copyright_or_licensing(self) -> bool: + """Either *spdx_expressions* or *copyright_lines* is non-empty.""" + return bool(self.spdx_expressions or self.copyright_lines) + def __bool__(self): return any(self.__dict__.values()) diff --git a/src/reuse/project.py b/src/reuse/project.py index 08b79e645..6bf6cae1a 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -164,11 +164,11 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: dep5_result = _copyright_from_dep5( self.relative_from_root(path), self._copyright ) - if bool(dep5_result): + if dep5_result.contains_copyright_or_licensing(): _LOGGER.info( _("'{path}' covered by .reuse/dep5").format(path=path) ) - license_path = ".reuse/dep5" + license_path = dep5_result.license_path # Search the file for SPDX information. with path.open("rb") as fp: diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 000000000..2b926eb44 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Tests for some core components.""" + +from reuse import SpdxInfo + +# REUSE-IgnoreStart + + +def test_spdx_info_contains_copyright_or_licensing(): + """If either spdx_expressions or copyright_lines is truthy, expect True.""" + arguments = [ + ({"GPL-3.0-or-later"}, set()), + (set(), "SPDX-FileCopyrightText: 2017 Jane Doe"), + ({"GPL-3.0-or-later"}, "SPDX-FileCopyrightText: 2017 Jane Doe"), + ] + for args in arguments: + info = SpdxInfo(*args) + assert info.contains_copyright_or_licensing() + + +def test_spdx_info_contains_copyright_or_licensing_empty(): + """If the SpdxInfo object is completely empty, expect False.""" + info = SpdxInfo() + assert not info.contains_copyright_or_licensing() + + +def test_spdx_info_contains_copyright_or_licensing_other_truthy(): + """If another attribute is truthy, still expect False.""" + info = SpdxInfo(contributor_lines={"SPDX-FileContributor: 2017 Jane Doe"}) + assert not info.contains_copyright_or_licensing() + + +# REUSE-IgnoreEnd From 23fa4ccad6e38a42a08e90ac7bd909544ff58222 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 16:24:01 +0200 Subject: [PATCH 44/60] Make lint version a global constant Signed-off-by: Carmen Bianca BAKKER --- src/reuse/report.py | 4 +++- tests/test_main.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index 9704c2a38..357c0bc3d 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -24,6 +24,8 @@ _LOGGER = logging.getLogger(__name__) +LINT_VERSION = "1.0" + class _MultiprocessingContainer: """Container that remembers some data in order to generate a FileReport.""" @@ -87,7 +89,7 @@ def to_dict_lint(self): """ # Setup report data container data = { - "lint_version": "1.0", + "lint_version": LINT_VERSION, "reuse_version": __REUSE_version__, "non_compliant": { "missing_licenses": self.missing_licenses, diff --git a/tests/test_main.py b/tests/test_main.py index c4bf45d70..0faed6df5 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -25,6 +25,7 @@ from reuse import download from reuse._main import main from reuse._util import GIT_EXE, HG_EXE +from reuse.report import LINT_VERSION # REUSE-IgnoreStart @@ -222,7 +223,7 @@ def test_lint_json(fake_repository, stringio): output = json.loads(stringio.getvalue()) assert result == 0 - assert output["lint_version"] == "1.0" + assert output["lint_version"] == LINT_VERSION assert len(output["files"]) == 8 @@ -233,7 +234,7 @@ def test_lint_json_fail(fake_repository, stringio): output = json.loads(stringio.getvalue()) assert result > 0 - assert output["lint_version"] == "1.0" + assert output["lint_version"] == LINT_VERSION assert len(output["non_compliant"]["missing_licensing_info"]) == 1 assert len(output["non_compliant"]["missing_copyright_info"]) == 1 assert len(output["files"]) == 9 From 8fa808a401d711adb273207fb800f16ff4df4f3f Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sun, 21 May 2023 16:27:16 +0200 Subject: [PATCH 45/60] Add tool version to json output Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 4 ++-- src/reuse/report.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 9a5751d0d..d1a0a6652 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -206,12 +206,12 @@ def format_plain(report: ProjectReport) -> str: output += _( "Congratulations! Your project is compliant with version" " {} of the REUSE Specification :-)" - ).format(data["reuse_version"]) + ).format(data["reuse_spec_version"]) else: output += _( "Unfortunately, your project is not compliant with version " "{} of the REUSE Specification :-(" - ).format(data["reuse_version"]) + ).format(data["reuse_spec_version"]) output += "\n" return output diff --git a/src/reuse/report.py b/src/reuse/report.py index 357c0bc3d..6ef8a683e 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -90,7 +90,8 @@ def to_dict_lint(self): # Setup report data container data = { "lint_version": LINT_VERSION, - "reuse_version": __REUSE_version__, + "reuse_spec_version": __REUSE_version__, + "reuse_tool_version": __version__, "non_compliant": { "missing_licenses": self.missing_licenses, "unused_licenses": [str(file) for file in self.unused_licenses], From c94b5a0eb0adb94f71343cf37f7e8736cc3b791d Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 25 May 2023 14:44:04 +0200 Subject: [PATCH 46/60] Refactoring precedence for license path --- src/reuse/project.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index 6bf6cae1a..c1a86eef5 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -168,7 +168,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: _LOGGER.info( _("'{path}' covered by .reuse/dep5").format(path=path) ) - license_path = dep5_result.license_path + license_path = str(self.root / ".reuse/dep5") # Search the file for SPDX information. with path.open("rb") as fp: @@ -195,14 +195,25 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: ).format(path=path) ) - spdx_expressions = dep5_result.spdx_expressions.union( - file_result.spdx_expressions - ) - copyright_lines = dep5_result.copyright_lines.union( - file_result.copyright_lines - ) + # There is only a .dep5 file + if ( + dep5_result.contains_copyright_or_licensing() + and not file_result.contains_copyright_or_licensing() + ): + # Information in the file header takes precendence over .dep5 file + return SpdxInfo( + spdx_expressions=dep5_result.spdx_expressions, + copyright_lines=dep5_result.copyright_lines, + license_path=license_path, + ) + # TODO Emit warning that information in .dep5 file was ommitted + + # There is both information in a .dep5 file and in the file header + # or there is only a file header return SpdxInfo( - spdx_expressions, copyright_lines, license_path=license_path + spdx_expressions=file_result.spdx_expressions, + copyright_lines=file_result.copyright_lines, + license_path=license_path, ) def relative_from_root(self, path: Path) -> Path: From 7493ed1bb1edd7e52608cfd72b92ae36f7c686f9 Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 25 May 2023 14:48:00 +0200 Subject: [PATCH 47/60] Add two `--json` specific test to `tests/test_lint.py` --- tests/test_lint.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/test_lint.py b/tests/test_lint.py index 050dbf5dd..6439006b0 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -155,4 +155,68 @@ def test_lint_files_without_copyright_and_licensing(fake_repository): assert "foo.py" in result +def test_lint_json_output(fake_repository): + """Test for lint with JSON output.""" + (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT") + project = Project(fake_repository) + report = ProjectReport.generate(project) + + json_result = report.to_dict_lint() + + assert json_result + # Test if all the keys are present + assert "lint_version" in json_result + assert "reuse_spec_version" in json_result + assert "reuse_tool_version" in json_result + assert "non_compliant" in json_result + assert "files" in json_result + assert "summary" in json_result + # Test length of resulting list values + assert len(json_result["files"]) == 9 + assert len(json_result["summary"]) == 5 + # Test result + assert json_result["summary"]["compliant"] is False + # Test license path + for test_file in json_result["files"]: + if test_file["path"] == str(fake_repository / "foo.py"): + assert test_file["licenses"][0]["value"] == "MIT" + assert test_file["licenses"][0]["source"] == str( + fake_repository / "foo.py" + ) + if test_file["path"].startswith(str(fake_repository / "doc")): + assert test_file["licenses"][0]["value"] == "CC0-1.0" + assert test_file["licenses"][0]["source"] == str( + fake_repository / ".reuse/dep5" + ) + + +def test_lint_json_output_precedence(fake_repository): + """Test for lint with JSON output with focus on precedence.""" + (fake_repository / "doc/differently_licensed_docs.rst").write_text( + "SPDX-License-Identifier: MIT" + ) + project = Project(fake_repository) + report = ProjectReport.generate(project) + + json_result = report.to_dict_lint() + + assert json_result + # Test result + assert json_result["summary"]["compliant"] is False + # Test license path precedence + for test_file in json_result["files"]: + if test_file["path"].startswith( + str(fake_repository / "doc/differently_licensed_docs.rst") + ): + assert test_file["licenses"][0]["value"] == "MIT" + assert test_file["licenses"][0]["source"] == str( + fake_repository / "doc/differently_licensed_docs.rst" + ) + if test_file["path"].startswith(str(fake_repository / "doc/index.rst")): + assert test_file["licenses"][0]["value"] == "CC0-1.0" + assert test_file["licenses"][0]["source"] == str( + fake_repository / ".reuse/dep5" + ) + + # REUSE-IgnoreEnd From bb3217a68cd15aeb09c1c17564a14e0adc2ffac9 Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 25 May 2023 15:14:56 +0200 Subject: [PATCH 48/60] Adapting two tests to new precedence behaviour --- tests/test_project.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 487c8a035..15cd65eec 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -247,22 +247,22 @@ def test_spdx_info_of_only_copyright(fake_repository): def test_spdx_info_of_only_copyright_also_covered_by_debian(fake_repository): """A file contains only a copyright line, but debian/copyright also has - information on this file. Use both. + information on this file. Use only the information from file header. """ (fake_repository / "doc/foo.py").write_text( "SPDX-FileCopyrightText: in file" ) project = Project(fake_repository) spdx_info = project.spdx_info_of("doc/foo.py") - assert any(spdx_info.spdx_expressions) - assert len(spdx_info.copyright_lines) == 2 + + assert len(spdx_info.copyright_lines) == 1 assert "SPDX-FileCopyrightText: in file" in spdx_info.copyright_lines - assert "2017 Jane Doe" in spdx_info.copyright_lines def test_spdx_info_of_also_covered_by_dep5(fake_repository): """A file contains all SPDX information, but .reuse/dep5 also - provides information on this file. Use both. + provides information on this file. Use only the information + from the file header. """ (fake_repository / "doc/foo.py").write_text( dedent( @@ -274,9 +274,9 @@ def test_spdx_info_of_also_covered_by_dep5(fake_repository): project = Project(fake_repository) spdx_info = project.spdx_info_of("doc/foo.py") assert LicenseSymbol("MIT") in spdx_info.spdx_expressions - assert LicenseSymbol("CC0-1.0") in spdx_info.spdx_expressions + assert LicenseSymbol("CC0-1.0") not in spdx_info.spdx_expressions assert "SPDX-FileCopyrightText: in file" in spdx_info.copyright_lines - assert "2017 Jane Doe" in spdx_info.copyright_lines + assert "2017 Jane Doe" not in spdx_info.copyright_lines def test_spdx_info_of_no_duplicates(empty_directory): From f2f1ef067a9fc7144eeaccaa6abd31adaff2f90e Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 25 May 2023 15:28:15 +0200 Subject: [PATCH 49/60] Adding a warning when information is overriden --- src/reuse/project.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index c1a86eef5..37eee586e 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -195,21 +195,31 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: ).format(path=path) ) - # There is only a .dep5 file + # There is both information in a .dep5 file and in the file header if ( + dep5_result.contains_copyright_or_licensing() + and file_result.contains_copyright_or_licensing() + ): + _LOGGER.warning( + _( + "Copyright and licensing information for '{path}' have been" + " found in both the file header or .license file and the" + " DEP5 file located at '{dep5_path}'. The information in" + " the DEP5 file has been overriden. Please ensure that this" + " is correct." + ).format(path=path, dep5_path=".reuse/dep5") + ) + # There is only a .dep5 file + elif ( dep5_result.contains_copyright_or_licensing() and not file_result.contains_copyright_or_licensing() ): - # Information in the file header takes precendence over .dep5 file return SpdxInfo( spdx_expressions=dep5_result.spdx_expressions, copyright_lines=dep5_result.copyright_lines, license_path=license_path, ) - # TODO Emit warning that information in .dep5 file was ommitted - - # There is both information in a .dep5 file and in the file header - # or there is only a file header + # There is only a file header return SpdxInfo( spdx_expressions=file_result.spdx_expressions, copyright_lines=file_result.copyright_lines, From cd820e16848646070e542e67a006f47cc294c68f Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 25 May 2023 15:55:41 +0200 Subject: [PATCH 50/60] Import `Path` instead of `PosixPath` --- src/reuse/lint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index d1a0a6652..9af429388 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -12,7 +12,7 @@ import os import sys from gettext import gettext as _ -from pathlib import PosixPath +from pathlib import Path from .project import Project from .report import ProjectReport @@ -229,7 +229,7 @@ def custom_serializer(obj): :param obj: Object to be serialized """ - if isinstance(obj, PosixPath): + if isinstance(obj, Path): return str(obj) if isinstance(obj, set): return list(obj) From 6c5154515ea2a414606a92d7c1adddcc7ee383b2 Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 25 May 2023 16:29:38 +0200 Subject: [PATCH 51/60] Use `StringIO` object instead of normal string object --- src/reuse/lint.py | 125 ++++++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 55 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 9af429388..f9681d64c 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -12,6 +12,7 @@ import os import sys from gettext import gettext as _ +from io import StringIO from pathlib import Path from .project import Project @@ -48,7 +49,7 @@ def format_plain(report: ProjectReport) -> str: :param report: ProjectReport data :return: String (in plaintext) that can be output to sys.stdout """ - output = "" + output = StringIO() data = report.to_dict_lint() # If the project is not compliant: @@ -56,59 +57,63 @@ def format_plain(report: ProjectReport) -> str: # Bad licenses bad_licenses = data["non_compliant"]["bad_licenses"] if bad_licenses: - output += "# " + _("BAD LICENSES") + "\n\n" + output.write("# " + _("BAD LICENSES") + "\n\n") for lic in sorted(bad_licenses.keys()): - output += _("'{}' found in:").format(lic) + "\n" - output += f"* {list(bad_licenses[lic])[0]}" + "\n\n" - output += "\n" + output.write(_("'{}' found in:").format(lic) + "\n") + output.write(f"* {list(bad_licenses[lic])[0]}" + "\n\n") + output.write("\n") # Deprecated licenses deprecated_licenses = data["non_compliant"]["deprecated_licenses"] if deprecated_licenses: - output += "# " + _("DEPRECATED LICENSES") + "\n\n" - output += _("The following licenses are deprecated by SPDX:") + "\n" + output.write("# " + _("DEPRECATED LICENSES") + "\n\n") + output.write( + _("The following licenses are deprecated by SPDX:") + "\n" + ) for lic in sorted(deprecated_licenses): - output += f"* {lic}\n" - output += "\n\n" + output.write(f"* {lic}\n") + output.write("\n\n") # Licenses without extension licenses_without_extension = data["non_compliant"][ "licenses_without_extension" ] if licenses_without_extension: - output += "# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n" - output += _("The following licenses have no file extension:") + "\n" + output.write("# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n") + output.write( + _("The following licenses have no file extension:") + "\n" + ) for lic in sorted(licenses_without_extension): - output += f"* {str(licenses_without_extension[lic])}" + "\n" - output += "\n\n" + output.write(f"* {str(licenses_without_extension[lic])}" + "\n") + output.write("\n\n") # Missing licenses missing_licenses = data["non_compliant"]["missing_licenses"] if missing_licenses: - output += "# " + _("MISSING LICENSES") + "\n\n" + output.write("# " + _("MISSING LICENSES") + "\n\n") for lic in sorted(missing_licenses.keys()): - output += _("'{}' found in:").format(lic) + "\n" + output.write(_("'{}' found in:").format(lic) + "\n") for file in sorted(missing_licenses[lic]): - output += f"* {file}\n" - output += "\n\n" + output.write(f"* {file}\n") + output.write("\n\n") # Unused licenses unused_licenses = data["non_compliant"]["unused_licenses"] if unused_licenses: - output += "# " + _("UNUSED LICENSES") + "\n\n" - output += _("The following licenses are not used:") + "\n" + output.write("# " + _("UNUSED LICENSES") + "\n\n") + output.write(_("The following licenses are not used:") + "\n") for lic in sorted(unused_licenses): - output += f"* {lic}\n" - output += "\n\n" + output.write(f"* {lic}\n") + output.write("\n\n") # Read errors read_errors = data["non_compliant"]["read_errors"] if read_errors: - output += "# " + _("READ ERRORS") + "\n\n" - output += _("Could not read:") + "\n" + output.write("# " + _("READ ERRORS") + "\n\n") + output.write(_("Could not read:") + "\n") for path in sorted(read_errors): - output += f"* {str(path)}" + "\n" - output += "\n\n" + output.write(f"* {str(path)}" + "\n") + output.write("\n\n") # Missing copyright and licensing information files_without_copyright = set( @@ -124,34 +129,40 @@ def format_plain(report: ProjectReport) -> str: header = ( "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n" ) - output += header + output.write(header) if files_without_both: - output += _( - "The following files have no copyright and licensing " - "information:" + output.write( + _( + "The following files have no copyright and licensing " + "information:" + ) ) - output += "\n" + output.write("\n") for file in sorted(files_without_both): - output += f"* {file}\n" - output += "\n" + output.write(f"* {file}\n") + output.write("\n") if files_without_copyright - files_without_both: - output += _("The following files have no copyright information:") - output += "\n" + output.write( + _("The following files have no copyright information:") + ) + output.write("\n") for file in sorted(files_without_copyright - files_without_both): - output += f"* {file}\n" - output += "\n" + output.write(f"* {file}\n") + output.write("\n") if files_without_license - files_without_both: - output += _("The following files have no licensing information:") - output += "\n" + output.write( + _("The following files have no licensing information:") + ) + output.write("\n") for file in sorted(files_without_license - files_without_both): - output += f"* {file}\n" - output += "\n" + output.write(f"* {file}\n") + output.write("\n") - output += "\n" - output += "# " + _("SUMMARY") - output += "\n\n" + output.write("\n") + output.write("# " + _("SUMMARY")) + output.write("\n\n") summary_contents = [ (_("Bad licenses:"), ", ".join(data["non_compliant"]["bad_licenses"])), @@ -199,22 +210,26 @@ def format_plain(report: ProjectReport) -> str: value = "0" if value == "empty": value = "" - output += "* " + key + " " + value + "\n" + output.write("* " + key + " " + value + "\n") - output += "\n" + output.write("\n") if data["summary"]["compliant"]: - output += _( - "Congratulations! Your project is compliant with version" - " {} of the REUSE Specification :-)" - ).format(data["reuse_spec_version"]) + output.write( + _( + "Congratulations! Your project is compliant with version" + " {} of the REUSE Specification :-)" + ).format(data["reuse_spec_version"]) + ) else: - output += _( - "Unfortunately, your project is not compliant with version " - "{} of the REUSE Specification :-(" - ).format(data["reuse_spec_version"]) - output += "\n" + output.write( + _( + "Unfortunately, your project is not compliant with version " + "{} of the REUSE Specification :-(" + ).format(data["reuse_spec_version"]) + ) + output.write("\n") - return output + return output.getvalue() def format_json(report: ProjectReport) -> str: From d1c21469b570ccc54339990ca31f621c791ee046 Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 25 May 2023 16:51:28 +0200 Subject: [PATCH 52/60] Make sure that some keys are on top of sorted output --- src/reuse/lint.py | 16 ++++++++++++++-- test | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) create mode 160000 test diff --git a/src/reuse/lint.py b/src/reuse/lint.py index f9681d64c..09b4a5b3d 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -252,12 +252,24 @@ def custom_serializer(obj): f"Object of type {obj.__class__.__name__} is not JSON serializable" ) + def custom_sort_key(key): + order = { + "lint_version": 0, + "reuse_spec_version": 1, + "reuse_tool_version": 2, + } + return (order.get(key, 3), key) + + report_dict = report.to_dict_lint() + sorted_report_dict = dict( + sorted(report_dict.items(), key=lambda item: custom_sort_key(item[0])) + ) + return json.dumps( - report.to_dict_lint(), + sorted_report_dict, indent=2, # Serialize sets to lists default=custom_serializer, - sort_keys=True, ) diff --git a/test b/test new file mode 160000 index 000000000..2703fff26 --- /dev/null +++ b/test @@ -0,0 +1 @@ +Subproject commit 2703fff268f581fc01293e275d16c8a210a5965c From c8baa6ec630fa97c963fada0cc4c6e2a7f626aba Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 25 May 2023 16:57:42 +0200 Subject: [PATCH 53/60] Make `summary_contents` a dictionary --- src/reuse/lint.py | 74 ++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 09b4a5b3d..ff0b7011f 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -164,53 +164,41 @@ def format_plain(report: ProjectReport) -> str: output.write("# " + _("SUMMARY")) output.write("\n\n") - summary_contents = [ - (_("Bad licenses:"), ", ".join(data["non_compliant"]["bad_licenses"])), - ( - _("Deprecated licenses:"), - ", ".join(data["non_compliant"]["deprecated_licenses"]), + summary_contents = { + _("Bad licenses:"): ", ".join(data["non_compliant"]["bad_licenses"]), + _("Deprecated licenses:"): ", ".join( + data["non_compliant"]["deprecated_licenses"] ), - ( - _("Licenses without file extension:"), - ", ".join(data["non_compliant"]["licenses_without_extension"]), + _("Licenses without file extension:"): ", ".join( + data["non_compliant"]["licenses_without_extension"] ), - ( - _("Missing licenses:"), - ", ".join(data["non_compliant"]["missing_licenses"]), + _("Missing licenses:"): ", ".join( + data["non_compliant"]["missing_licenses"] ), - ( - _("Unused licenses:"), - ", ".join(data["non_compliant"]["unused_licenses"]), + _("Unused licenses:"): ", ".join( + data["non_compliant"]["unused_licenses"] ), - (_("Used licenses:"), ", ".join(data["summary"]["used_licenses"])), - ( - _("Read errors: {count}").format( - count=len(data["non_compliant"]["read_errors"]) - ), - "empty", - ), - ( - _("files with copyright information: {count} / {total}").format( - count=data["summary"]["files_with_copyright_info"], - total=data["summary"]["files_total"], - ), - "empty", - ), - ( - _("files with license information: {count} / {total}").format( - count=data["summary"]["files_with_licensing_info"], - total=data["summary"]["files_total"], - ), - "empty", - ), - ] - - for key, value in summary_contents: - if not value: - value = "0" - if value == "empty": - value = "" - output.write("* " + key + " " + value + "\n") + _("Used licenses:"): ", ".join(data["summary"]["used_licenses"]), + _("Read errors: {count}").format( + count=len(data["non_compliant"]["read_errors"]) + ): "empty", + _("files with copyright information: {count} / {total}").format( + count=data["summary"]["files_with_copyright_info"], + total=data["summary"]["files_total"], + ): "empty", + _("files with license information: {count} / {total}").format( + count=data["summary"]["files_with_licensing_info"], + total=data["summary"]["files_total"], + ): "empty", + } + + filtered_summary_contents = { + key: (value if value not in ("", "empty") else "0" if not value else "") + for key, value in summary_contents.items() + } + + for key, value in filtered_summary_contents.items(): + output.write(f"* {key} {value}\n") output.write("\n") if data["summary"]["compliant"]: From 983b0c4158b4185c9d25f1a0af09aed9c3326f10 Mon Sep 17 00:00:00 2001 From: linozen Date: Fri, 26 May 2023 17:02:51 +0200 Subject: [PATCH 54/60] Simplify `lint --quiet` --- src/reuse/lint.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index ff0b7011f..29a7268c2 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -7,9 +7,7 @@ the reports and printing some conclusions. """ -import contextlib import json -import os import sys from gettext import gettext as _ from io import StringIO @@ -21,10 +19,10 @@ def add_arguments(parser): """Add arguments to parser.""" - parser.add_argument( + mutex_group = parser.add_mutually_exclusive_group() + mutex_group.add_argument( "-q", "--quiet", action="store_true", help=_("prevents output") ) - mutex_group = parser.add_mutually_exclusive_group() mutex_group.add_argument( "-j", "--json", action="store_true", help=_("formats output as JSON") ) @@ -37,7 +35,7 @@ def add_arguments(parser): mutex_group.add_argument( "--format", nargs="?", - choices=("json", "plain"), + choices=("json", "plain", "quiet"), help=_("formats output using the chosen formatter"), ) @@ -261,23 +259,17 @@ def custom_sort_key(key): ) -def run(args, project: Project, out=sys.stdout, formatter=format_plain): +def run(args, project: Project, out=sys.stdout): """List all non-compliant files.""" report = ProjectReport.generate( project, do_checksum=False, multiprocessing=not args.no_multiprocessing ) - with contextlib.ExitStack() as stack: - if args.quiet: - out = stack.enter_context(open(os.devnull, "w", encoding="utf-8")) - - if args.json or args.format == "json": - formatter = format_json - elif args.plain or args.format == "plain": - formatter = format_plain - else: - formatter = format_plain - - out.write(formatter(report)) + if args.quiet or args.format == "quiet": + pass + elif args.json or args.format == "json": + out.write(format_json(report)) + else: + out.write(format_plain(report)) return 0 if report.is_compliant else 1 From d7af2e282dfaea9a361a0e19cb1b3d78d2296ea2 Mon Sep 17 00:00:00 2001 From: linozen Date: Fri, 26 May 2023 17:20:44 +0200 Subject: [PATCH 55/60] Move dictionary sorting and test it --- src/reuse/lint.py | 15 +-------------- src/reuse/report.py | 27 +++++++++++++++++++++++---- tests/test_report.py | 15 ++++++++++++--- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 29a7268c2..25eb90220 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -238,21 +238,8 @@ def custom_serializer(obj): f"Object of type {obj.__class__.__name__} is not JSON serializable" ) - def custom_sort_key(key): - order = { - "lint_version": 0, - "reuse_spec_version": 1, - "reuse_tool_version": 2, - } - return (order.get(key, 3), key) - - report_dict = report.to_dict_lint() - sorted_report_dict = dict( - sorted(report_dict.items(), key=lambda item: custom_sort_key(item[0])) - ) - return json.dumps( - sorted_report_dict, + report.to_dict_lint(), indent=2, # Serialize sets to lists default=custom_serializer, diff --git a/src/reuse/report.py b/src/reuse/report.py index 6ef8a683e..d74c2df9b 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -89,9 +89,6 @@ def to_dict_lint(self): """ # Setup report data container data = { - "lint_version": LINT_VERSION, - "reuse_spec_version": __REUSE_version__, - "reuse_tool_version": __version__, "non_compliant": { "missing_licenses": self.missing_licenses, "unused_licenses": [str(file) for file in self.unused_licenses], @@ -129,7 +126,29 @@ def to_dict_lint(self): - len(self.files_without_licenses), "compliant": self.is_compliant, } - return data + + # Add the top three keys + unsorted_data = { + "lint_version": LINT_VERSION, + "reuse_spec_version": __REUSE_version__, + "reuse_tool_version": __version__, + **data, + } + + # Sort dictionary keys while keeping the top three keys at the beginning + sorted_keys = sorted(list(unsorted_data.keys())) + sorted_keys.remove("lint_version") + sorted_keys.remove("reuse_spec_version") + sorted_keys.remove("reuse_tool_version") + sorted_keys = [ + "lint_version", + "reuse_spec_version", + "reuse_tool_version", + ] + sorted_keys + + sorted_data = {key: unsorted_data[key] for key in sorted_keys} + + return sorted_data def bill_of_materials( self, diff --git a/tests/test_report.py b/tests/test_report.py index 5a5c77c49..4bc2a23f0 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -379,11 +379,20 @@ def test_generate_project_report_read_error(fake_repository, multiprocessing): def test_generate_project_report_to_dict_lint(fake_repository, multiprocessing): - """Extremely simple test for ProjectReport.to_dict.""" + """Generate dictionary output and verify correct ordering.""" project = Project(fake_repository) report = ProjectReport.generate(project, multiprocessing=multiprocessing) - # TODO: Actually do something - report.to_dict_lint() + result = report.to_dict_lint() + + # Check if the top three keys are at the beginning of the dictionary + assert list(result.keys())[:3] == [ + "lint_version", + "reuse_spec_version", + "reuse_tool_version", + ] + + # Check if the rest of the keys are sorted alphabetically + assert list(result.keys())[3:] == sorted(list(result.keys())[3:]) def test_bill_of_materials(fake_repository, multiprocessing): From 33456cbeb1aeb2daf5f4b85f6fc349cf2486ff80 Mon Sep 17 00:00:00 2001 From: linozen Date: Fri, 26 May 2023 17:25:42 +0200 Subject: [PATCH 56/60] Rename `SpdxInfo` to `ReuseInfo` --- src/reuse/__init__.py | 4 +- src/reuse/_util.py | 14 +++--- src/reuse/header.py | 16 +++---- src/reuse/project.py | 12 ++--- src/reuse/report.py | 4 +- tests/test_core.py | 8 ++-- tests/test_header.py | 106 ++++++++++++++++-------------------------- tests/test_util.py | 2 +- 8 files changed, 71 insertions(+), 95 deletions(-) diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index 46cd1fd2a..fe8e70f7d 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -89,8 +89,8 @@ @dataclass(frozen=True) -class SpdxInfo: - """Simple class holding SPDX information""" +class ReuseInfo: + """Simple dataclass holding licensing and copyright information""" spdx_expressions: Set[Expression] = field(default_factory=set) copyright_lines: Set[str] = field(default_factory=set) diff --git a/src/reuse/_util.py b/src/reuse/_util.py index 693d6615c..22bf92c67 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -31,7 +31,7 @@ from debian.copyright import Copyright from license_expression import ExpressionError, Licensing -from . import SpdxInfo +from . import ReuseInfo from ._licenses import ALL_NON_DEPRECATED_MAP from .comment import _all_style_classes @@ -203,14 +203,16 @@ def _determine_license_suffix_path(path: PathLike) -> Path: return Path(f"{path}.license") -def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo: +def _copyright_from_dep5( + path: PathLike, dep5_copyright: Copyright +) -> ReuseInfo: """Find the reuse information of *path* in the dep5 Copyright object.""" result = dep5_copyright.find_files_paragraph(Path(path).as_posix()) if result is None: - return SpdxInfo(set(), set(), license_path=str(path)) + return ReuseInfo(set(), set(), license_path=str(path)) - return SpdxInfo( + return ReuseInfo( set(map(_LICENSING.parse, [result.license.synopsis])), set(map(str.strip, result.copyright.splitlines())), license_path=str(path), @@ -290,7 +292,7 @@ def merge_copyright_lines(copyright_lines: Set[str]) -> Set[str]: return copyright_out -def extract_spdx_info(text: str) -> SpdxInfo: +def extract_spdx_info(text: str) -> ReuseInfo: """Extract SPDX information from comments in a string. :raises ExpressionError: if an SPDX expression could not be parsed @@ -317,7 +319,7 @@ def extract_spdx_info(text: str) -> SpdxInfo: copyright_matches.add(match.groupdict()["copyright"].strip()) break - return SpdxInfo(expressions, copyright_matches, "") + return ReuseInfo(expressions, copyright_matches, "") def find_license_identifiers(text: str) -> Iterator[str]: diff --git a/src/reuse/header.py b/src/reuse/header.py index 8454c283d..bef038d6b 100644 --- a/src/reuse/header.py +++ b/src/reuse/header.py @@ -33,7 +33,7 @@ from jinja2.exceptions import TemplateNotFound from license_expression import ExpressionError -from . import SpdxInfo +from . import ReuseInfo from ._util import ( _COPYRIGHT_STYLES, PathType, @@ -81,7 +81,7 @@ class MissingSpdxInfo(Exception): # TODO: Add a template here maybe. def _create_new_header( - spdx_info: SpdxInfo, + spdx_info: ReuseInfo, template: Template = None, template_is_commented: bool = False, style: CommentStyle = None, @@ -131,7 +131,7 @@ def _create_new_header( # pylint: disable=too-many-arguments def create_header( - spdx_info: SpdxInfo, + spdx_info: ReuseInfo, header: str = None, template: Template = None, template_is_commented: bool = False, @@ -174,7 +174,7 @@ def create_header( ) # TODO: This behaviour does not match the docstring. - spdx_info = SpdxInfo( + spdx_info = ReuseInfo( spdx_info.spdx_expressions.union(existing_spdx.spdx_expressions), spdx_copyrights, spdx_info.contributor_lines.union(existing_spdx.contributor_lines), @@ -250,7 +250,7 @@ def _extract_shebang(prefix: str, text: str) -> Tuple[str, str]: # pylint: disable=too-many-arguments def find_and_replace_header( text: str, - spdx_info: SpdxInfo, + spdx_info: ReuseInfo, template: Template = None, template_is_commented: bool = False, style: CommentStyle = None, @@ -327,7 +327,7 @@ def find_and_replace_header( # pylint: disable=too-many-arguments def add_new_header( text: str, - spdx_info: SpdxInfo, + spdx_info: ReuseInfo, template: Template = None, template_is_commented: bool = False, style: CommentStyle = None, @@ -464,7 +464,7 @@ def _find_template(project: Project, name: str) -> Template: def _add_header_to_file( path: PathLike, - spdx_info: SpdxInfo, + spdx_info: ReuseInfo, template: Template, template_is_commented: bool, style: Optional[str], @@ -785,7 +785,7 @@ def run(args, project: Project, out=sys.stdout) -> int: set(args.contributor) if args.contributor is not None else set() ) - spdx_info = SpdxInfo(expressions, copyright_lines, contributors, "") + spdx_info = ReuseInfo(expressions, copyright_lines, contributors, "") result = 0 for path in paths: diff --git a/src/reuse/project.py b/src/reuse/project.py index 37eee586e..5fabcda19 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -24,7 +24,7 @@ _IGNORE_FILE_PATTERNS, _IGNORE_MESON_PARENT_DIR_PATTERNS, IdentifierNotFound, - SpdxInfo, + ReuseInfo, ) from ._licenses import EXCEPTION_MAP, LICENSE_MAP from ._util import ( @@ -140,7 +140,7 @@ def all_files(self, directory: PathLike = None) -> Iterator[Path]: _LOGGER.debug("yielding '%s'", the_file) yield the_file - def spdx_info_of(self, path: PathLike) -> SpdxInfo: + def spdx_info_of(self, path: PathLike) -> ReuseInfo: """Return SPDX info of *path*. This function will return any SPDX information that it can find, both @@ -156,8 +156,8 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: # This means that only one 'source' of licensing/copyright information # is captured in SpdxInfo - dep5_result = SpdxInfo(set(), set()) - file_result = SpdxInfo(set(), set()) + dep5_result = ReuseInfo(set(), set()) + file_result = ReuseInfo(set(), set()) # Search the .reuse/dep5 file for SPDX information. if self._copyright: @@ -214,13 +214,13 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: dep5_result.contains_copyright_or_licensing() and not file_result.contains_copyright_or_licensing() ): - return SpdxInfo( + return ReuseInfo( spdx_expressions=dep5_result.spdx_expressions, copyright_lines=dep5_result.copyright_lines, license_path=license_path, ) # There is only a file header - return SpdxInfo( + return ReuseInfo( spdx_expressions=file_result.spdx_expressions, copyright_lines=file_result.copyright_lines, license_path=license_path, diff --git a/src/reuse/report.py b/src/reuse/report.py index d74c2df9b..6defb2e7e 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -20,7 +20,7 @@ from . import __REUSE_version__, __version__ from ._util import _LICENSING, _checksum -from .project import Project, SpdxInfo +from .project import Project, ReuseInfo _LOGGER = logging.getLogger(__name__) @@ -390,7 +390,7 @@ def __init__(self, name, spdx_id=None, chk_sum=None): self.licenses_in_file: List[str] = [] self.license_concluded: str = None self.copyright: str = None - self.info: SpdxInfo = None + self.info: ReuseInfo = None class FileReport: diff --git a/tests/test_core.py b/tests/test_core.py index 2b926eb44..85c80bf67 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4,7 +4,7 @@ """Tests for some core components.""" -from reuse import SpdxInfo +from reuse import ReuseInfo # REUSE-IgnoreStart @@ -17,19 +17,19 @@ def test_spdx_info_contains_copyright_or_licensing(): ({"GPL-3.0-or-later"}, "SPDX-FileCopyrightText: 2017 Jane Doe"), ] for args in arguments: - info = SpdxInfo(*args) + info = ReuseInfo(*args) assert info.contains_copyright_or_licensing() def test_spdx_info_contains_copyright_or_licensing_empty(): """If the SpdxInfo object is completely empty, expect False.""" - info = SpdxInfo() + info = ReuseInfo() assert not info.contains_copyright_or_licensing() def test_spdx_info_contains_copyright_or_licensing_other_truthy(): """If another attribute is truthy, still expect False.""" - info = SpdxInfo(contributor_lines={"SPDX-FileContributor: 2017 Jane Doe"}) + info = ReuseInfo(contributor_lines={"SPDX-FileContributor: 2017 Jane Doe"}) assert not info.contains_copyright_or_licensing() diff --git a/tests/test_header.py b/tests/test_header.py index c97d41443..0a1a492a9 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -9,7 +9,7 @@ import pytest -from reuse import SpdxInfo +from reuse import ReuseInfo from reuse.comment import CCommentStyle, CommentCreateError from reuse.header import ( MissingSpdxInfo, @@ -23,9 +23,7 @@ def test_create_header_simple(): """Create a super simple header.""" - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) expected = cleandoc( """ # SPDX-FileCopyrightText: Jane Doe @@ -34,12 +32,12 @@ def test_create_header_simple(): """ ) - assert create_header(spdx_info).strip() == expected + assert create_header(info).strip() == expected def test_create_header_simple_with_contributor(): """Create a super simple header.""" - spdx_info = SpdxInfo( + info = ReuseInfo( {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, {"John Doe"} ) expected = cleandoc( @@ -51,14 +49,12 @@ def test_create_header_simple_with_contributor(): """ ) - assert create_header(spdx_info).strip() == expected + assert create_header(info).strip() == expected def test_create_header_template_simple(template_simple): """Create a header with a simple template.""" - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) expected = cleandoc( """ # Hello, world! @@ -69,26 +65,20 @@ def test_create_header_template_simple(template_simple): """ ) - assert ( - create_header(spdx_info, template=template_simple).strip() == expected - ) + assert create_header(info, template=template_simple).strip() == expected def test_create_header_template_no_spdx(template_no_spdx): """Create a header with a template that does not have all SPDX info.""" - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) with pytest.raises(MissingSpdxInfo): - create_header(spdx_info, template=template_no_spdx) + create_header(info, template=template_no_spdx) def test_create_header_template_commented(template_commented): """Create a header with an already-commented template.""" - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) expected = cleandoc( """ # Hello, world! @@ -101,7 +91,7 @@ def test_create_header_template_commented(template_commented): assert ( create_header( - spdx_info, + info, template=template_commented, template_is_commented=True, style=CCommentStyle, @@ -112,9 +102,7 @@ def test_create_header_template_commented(template_commented): def test_create_header_already_contains_spdx(): """Create a new header from a header that already contains SPDX info.""" - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) existing = cleandoc( """ # SPDX-FileCopyrightText: John Doe @@ -132,14 +120,12 @@ def test_create_header_already_contains_spdx(): """ ) - assert create_header(spdx_info, header=existing).strip() == expected + assert create_header(info, header=existing).strip() == expected def test_create_header_existing_is_wrong(): """If the existing header contains errors, raise a CommentCreateError.""" - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) existing = cleandoc( """ # SPDX-FileCopyrightText: John Doe @@ -149,12 +135,12 @@ def test_create_header_existing_is_wrong(): ) with pytest.raises(CommentCreateError): - create_header(spdx_info, header=existing) + create_header(info, header=existing) def test_create_header_old_syntax(): """Old copyright syntax is preserved when creating a new header.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + info = ReuseInfo({"GPL-3.0-or-later"}, set()) existing = cleandoc( """ # Copyright John Doe @@ -168,12 +154,12 @@ def test_create_header_old_syntax(): """ ) - assert create_header(spdx_info, header=existing).strip() == expected + assert create_header(info, header=existing).strip() == expected def test_create_header_remove_fluff(): """Any stuff that isn't SPDX info is removed when using create_header.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + info = ReuseInfo({"GPL-3.0-or-later"}, set()) existing = cleandoc( """ # SPDX-FileCopyrightText: John Doe @@ -191,16 +177,14 @@ def test_create_header_remove_fluff(): """ ) - assert create_header(spdx_info, header=existing).strip() == expected + assert create_header(info, header=existing).strip() == expected def test_add_new_header_simple(): """Given text that already contains a header, create a new one, and preserve the old one. """ - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) text = cleandoc( """ # SPDX-FileCopyrightText: John Doe @@ -223,14 +207,12 @@ def test_add_new_header_simple(): pass """ ) - assert add_new_header(text, spdx_info) == expected + assert add_new_header(text, info) == expected def test_find_and_replace_no_header(): """Given text without header, add a header.""" - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) text = "pass" expected = cleandoc( """ @@ -243,15 +225,15 @@ def test_find_and_replace_no_header(): ) assert ( - find_and_replace_header(text, spdx_info) - == add_new_header(text, spdx_info) + find_and_replace_header(text, info) + == add_new_header(text, info) == expected ) def test_find_and_replace_verbatim(): """Replace a header with itself.""" - spdx_info = SpdxInfo(set(), set()) + info = ReuseInfo(set(), set()) text = cleandoc( """ # SPDX-FileCopyrightText: Jane Doe @@ -262,16 +244,14 @@ def test_find_and_replace_verbatim(): """ ) - assert find_and_replace_header(text, spdx_info) == text + assert find_and_replace_header(text, info) == text def test_find_and_replace_newline_before_header(): """In a scenario where the header is preceded by whitespace, remove the preceding whitespace. """ - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}) text = cleandoc( """ # SPDX-FileCopyrightText: Jane Doe @@ -291,14 +271,12 @@ def test_find_and_replace_newline_before_header(): """ ) - assert find_and_replace_header(text, spdx_info) == expected + assert find_and_replace_header(text, info) == expected def test_find_and_replace_preserve_preceding(): """When the SPDX header is in the middle of the file, keep it there.""" - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}) text = cleandoc( """ # Hello, world! @@ -327,16 +305,14 @@ def foo(bar): """ ) - assert find_and_replace_header(text, spdx_info) == expected + assert find_and_replace_header(text, info) == expected def test_find_and_replace_keep_shebang(): """When encountering a shebang, keep it and put the REUSE header beneath it. """ - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}) text = cleandoc( """ #!/usr/bin/env python3 @@ -359,14 +335,14 @@ def test_find_and_replace_keep_shebang(): """ ) - assert find_and_replace_header(text, spdx_info) == expected + assert find_and_replace_header(text, info) == expected def test_find_and_replace_separate_shebang(): """When the shebang is part of the same comment as the SPDX comment, separate the two. """ - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + info = ReuseInfo({"GPL-3.0-or-later"}, set()) text = cleandoc( """ #!/usr/bin/env python3 @@ -389,12 +365,12 @@ def test_find_and_replace_separate_shebang(): """ ) - assert find_and_replace_header(text, spdx_info) == expected + assert find_and_replace_header(text, info) == expected def test_find_and_replace_only_shebang(): """When the file only contains a shebang, keep it at the top of the file.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + info = ReuseInfo({"GPL-3.0-or-later"}, set()) text = cleandoc( """ #!/usr/bin/env python3 @@ -416,16 +392,14 @@ def test_find_and_replace_only_shebang(): """ ) - assert find_and_replace_header(text, spdx_info) == expected + assert find_and_replace_header(text, info) == expected def test_find_and_replace_keep_old_comment(): """When encountering a comment that does not contain copyright and licensing information, preserve it below the REUSE header. """ - spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} - ) + info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}) text = cleandoc( """ # Hello, world! @@ -445,13 +419,13 @@ def test_find_and_replace_keep_old_comment(): """ ) - assert find_and_replace_header(text, spdx_info) == expected + assert find_and_replace_header(text, info) == expected def test_find_and_replace_preserve_newline(): """If the file content ends with a newline, don't remove it.""" - spdx_info = SpdxInfo(set(), set()) + info = ReuseInfo(set(), set()) text = ( cleandoc( """ @@ -465,7 +439,7 @@ def test_find_and_replace_preserve_newline(): + "\n" ) - assert find_and_replace_header(text, spdx_info) == text + assert find_and_replace_header(text, info) == text # REUSE-IgnoreEnd diff --git a/tests/test_util.py b/tests/test_util.py index 49011e65d..45ba6e371 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -78,7 +78,7 @@ def test_extract_no_info(): object. """ result = _util.extract_spdx_info("") - assert result == _util.SpdxInfo(set(), set(), "") + assert result == _util.ReuseInfo(set(), set(), "") def test_extract_tab(): From 962521f3e1542a3f3e38fb0ae1756bafe2b5b88f Mon Sep 17 00:00:00 2001 From: linozen Date: Fri, 26 May 2023 17:31:47 +0200 Subject: [PATCH 57/60] Rename `ReuseInfo.license_path` to `ReuseInfo.source_path` --- src/reuse/__init__.py | 2 +- src/reuse/_util.py | 4 ++-- src/reuse/project.py | 10 +++++----- src/reuse/report.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index fe8e70f7d..350ff3320 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -95,7 +95,7 @@ class ReuseInfo: spdx_expressions: Set[Expression] = field(default_factory=set) copyright_lines: Set[str] = field(default_factory=set) contributor_lines: Set[str] = field(default_factory=set) - license_path: Optional[str] = None + source_path: Optional[str] = None def contains_copyright_or_licensing(self) -> bool: """Either *spdx_expressions* or *copyright_lines* is non-empty.""" diff --git a/src/reuse/_util.py b/src/reuse/_util.py index 22bf92c67..2137f7ab5 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -210,12 +210,12 @@ def _copyright_from_dep5( result = dep5_copyright.find_files_paragraph(Path(path).as_posix()) if result is None: - return ReuseInfo(set(), set(), license_path=str(path)) + return ReuseInfo(set(), set(), source_path=str(path)) return ReuseInfo( set(map(_LICENSING.parse, [result.license.synopsis])), set(map(str.strip, result.copyright.splitlines())), - license_path=str(path), + source_path=str(path), ) diff --git a/src/reuse/project.py b/src/reuse/project.py index 5fabcda19..65098c2fe 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -152,7 +152,7 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo: path = _determine_license_path(path) _LOGGER.debug(f"searching '{path}' for SPDX information") - license_path = "" + source_path = "" # This means that only one 'source' of licensing/copyright information # is captured in SpdxInfo @@ -168,7 +168,7 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo: _LOGGER.info( _("'{path}' covered by .reuse/dep5").format(path=path) ) - license_path = str(self.root / ".reuse/dep5") + source_path = str(self.root / ".reuse/dep5") # Search the file for SPDX information. with path.open("rb") as fp: @@ -186,7 +186,7 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo: decoded_text_from_binary(fp, size=read_limit) ) if file_result: - license_path = str(path) + source_path = str(path) except (ExpressionError, ParseError): _LOGGER.error( _( @@ -217,13 +217,13 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo: return ReuseInfo( spdx_expressions=dep5_result.spdx_expressions, copyright_lines=dep5_result.copyright_lines, - license_path=license_path, + source_path=source_path, ) # There is only a file header return ReuseInfo( spdx_expressions=file_result.spdx_expressions, copyright_lines=file_result.copyright_lines, - license_path=license_path, + source_path=source_path, ) def relative_from_root(self, path: Path) -> Path: diff --git a/src/reuse/report.py b/src/reuse/report.py index 6defb2e7e..f4d236715 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -416,13 +416,13 @@ def to_dict_lint(self): "path": str(Path(self.path).resolve()), # TODO: Why does every copyright line have the same source? "copyrights": [ - {"value": copyright_, "source": self.spdxfile.info.license_path} + {"value": copyright_, "source": self.spdxfile.info.source_path} for copyright_ in self.spdxfile.copyright.split("\n") if copyright_ ], # TODO: Why does every license expression have the same source? "licenses": [ - {"value": license_, "source": self.spdxfile.info.license_path} + {"value": license_, "source": self.spdxfile.info.source_path} for license_ in self.spdxfile.licenses_in_file if license_ ], From 852b3fbc0e4090518fe912ae119e0d21bff958d0 Mon Sep 17 00:00:00 2001 From: linozen Date: Fri, 26 May 2023 18:27:55 +0200 Subject: [PATCH 58/60] Also encode `source_type` in `ReuseInfo` --- src/reuse/__init__.py | 1 + src/reuse/project.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index 350ff3320..2da988173 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -96,6 +96,7 @@ class ReuseInfo: copyright_lines: Set[str] = field(default_factory=set) contributor_lines: Set[str] = field(default_factory=set) source_path: Optional[str] = None + source_type: Optional[str] = None def contains_copyright_or_licensing(self) -> bool: """Either *spdx_expressions* or *copyright_lines* is non-empty.""" diff --git a/src/reuse/project.py b/src/reuse/project.py index 65098c2fe..00b68a340 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -150,9 +150,10 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo: information, where 'primary' means '.license file' > 'header' > 'dep5' """ path = _determine_license_path(path) - _LOGGER.debug(f"searching '{path}' for SPDX information") - source_path = "" + source_type = "" + + _LOGGER.debug(f"searching '{path}' for SPDX information") # This means that only one 'source' of licensing/copyright information # is captured in SpdxInfo @@ -187,6 +188,11 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo: ) if file_result: source_path = str(path) + if path.suffix == ".license": + source_type = ".license file" + else: + source_type = "file header" + except (ExpressionError, ParseError): _LOGGER.error( _( @@ -209,7 +215,7 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo: " is correct." ).format(path=path, dep5_path=".reuse/dep5") ) - # There is only a .dep5 file + # Information is only found in a DEP5 file elif ( dep5_result.contains_copyright_or_licensing() and not file_result.contains_copyright_or_licensing() @@ -218,12 +224,14 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo: spdx_expressions=dep5_result.spdx_expressions, copyright_lines=dep5_result.copyright_lines, source_path=source_path, + source_type="DEP5 file", ) - # There is only a file header + # There is a file header or a .license file return ReuseInfo( spdx_expressions=file_result.spdx_expressions, copyright_lines=file_result.copyright_lines, source_path=source_path, + source_type=source_type, ) def relative_from_root(self, path: Path) -> Path: From 322dd8a1cc3167690993d7b6a7f89c8a7314a1cf Mon Sep 17 00:00:00 2001 From: linozen Date: Fri, 26 May 2023 18:32:53 +0200 Subject: [PATCH 59/60] Rename `spdx_info_of` to `reuse_info_of` --- src/reuse/project.py | 2 +- src/reuse/report.py | 14 +++++---- tests/test_project.py | 70 +++++++++++++++++++++---------------------- 3 files changed, 44 insertions(+), 42 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index 00b68a340..7f6506863 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -140,7 +140,7 @@ def all_files(self, directory: PathLike = None) -> Iterator[Path]: _LOGGER.debug("yielding '%s'", the_file) yield the_file - def spdx_info_of(self, path: PathLike) -> ReuseInfo: + def reuse_info_of(self, path: PathLike) -> ReuseInfo: """Return SPDX info of *path*. This function will return any SPDX information that it can find, both diff --git a/src/reuse/report.py b/src/reuse/report.py index f4d236715..1303f9748 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -457,8 +457,8 @@ def generate( spdx_id.update(report.spdxfile.chk_sum.encode("utf-8")) report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}" - spdx_info = project.spdx_info_of(path) - for expression in spdx_info.spdx_expressions: + reuse_info = project.reuse_info_of(path) + for expression in reuse_info.spdx_expressions: for identifier in _LICENSING.license_keys(expression): # A license expression akin to Apache-1.0+ should register # correctly if LICENSES/Apache-1.0.txt exists. @@ -477,7 +477,7 @@ def generate( if not add_license_concluded: report.spdxfile.license_concluded = "NOASSERTION" - elif not spdx_info.spdx_expressions: + elif not reuse_info.spdx_expressions: report.spdxfile.license_concluded = "NONE" else: # Merge all the license expressions together, wrapping them in @@ -488,7 +488,7 @@ def generate( _LICENSING.parse( " AND ".join( f"({expression})" - for expression in spdx_info.spdx_expressions + for expression in reuse_info.spdx_expressions ), ) .simplify() @@ -496,9 +496,11 @@ def generate( ) # Copyright text - report.spdxfile.copyright = "\n".join(sorted(spdx_info.copyright_lines)) + report.spdxfile.copyright = "\n".join( + sorted(reuse_info.copyright_lines) + ) # Source of licensing and copyright info - report.spdxfile.info = spdx_info + report.spdxfile.info = reuse_info return report def __hash__(self): diff --git a/tests/test_project.py b/tests/test_project.py index 15cd65eec..20aa80cbf 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -201,34 +201,34 @@ def test_all_files_hg_ignored_contains_newline(hg_repository): assert Path("hello\nworld.pyc").absolute() not in project.all_files() -def test_spdx_info_of_file_does_not_exist(fake_repository): +def test_reuse_info_of_file_does_not_exist(fake_repository): """Raise FileNotFoundError when asking for the SPDX info of a file that does not exist. """ project = Project(fake_repository) with pytest.raises(FileNotFoundError): - project.spdx_info_of(fake_repository / "does_not_exist") + project.reuse_info_of(fake_repository / "does_not_exist") -def test_spdx_info_of_directory(empty_directory): - """Raise IsADirectoryError when calling spdx_info_of on a directory.""" +def test_reuse_info_of_directory(empty_directory): + """Raise IsADirectoryError when calling reuse_info_of on a directory.""" (empty_directory / "src").mkdir() project = Project(empty_directory) with pytest.raises((IsADirectoryError, PermissionError)): - project.spdx_info_of(empty_directory / "src") + project.reuse_info_of(empty_directory / "src") -def test_spdx_info_of_unlicensed_file(fake_repository): +def test_reuse_info_of_unlicensed_file(fake_repository): """Return an empty SpdxInfo object when asking for the SPDX information of a file that has no SPDX information. """ (fake_repository / "foo.py").write_text("foo") project = Project(fake_repository) - assert not bool(project.spdx_info_of("foo.py")) + assert not bool(project.reuse_info_of("foo.py")) -def test_spdx_info_of_only_copyright(fake_repository): +def test_reuse_info_of_only_copyright(fake_repository): """A file contains only a copyright line. Test whether it correctly picks up on that. """ @@ -236,16 +236,16 @@ def test_spdx_info_of_only_copyright(fake_repository): "SPDX-FileCopyrightText: 2017 Jane Doe" ) project = Project(fake_repository) - spdx_info = project.spdx_info_of("foo.py") - assert not any(spdx_info.spdx_expressions) - assert len(spdx_info.copyright_lines) == 1 + reuse_info = project.reuse_info_of("foo.py") + assert not any(reuse_info.spdx_expressions) + assert len(reuse_info.copyright_lines) == 1 assert ( - spdx_info.copyright_lines.pop() + reuse_info.copyright_lines.pop() == "SPDX-FileCopyrightText: 2017 Jane Doe" ) -def test_spdx_info_of_only_copyright_also_covered_by_debian(fake_repository): +def test_reuse_info_of_only_copyright_also_covered_by_debian(fake_repository): """A file contains only a copyright line, but debian/copyright also has information on this file. Use only the information from file header. """ @@ -253,13 +253,13 @@ def test_spdx_info_of_only_copyright_also_covered_by_debian(fake_repository): "SPDX-FileCopyrightText: in file" ) project = Project(fake_repository) - spdx_info = project.spdx_info_of("doc/foo.py") + reuse_info = project.reuse_info_of("doc/foo.py") - assert len(spdx_info.copyright_lines) == 1 - assert "SPDX-FileCopyrightText: in file" in spdx_info.copyright_lines + assert len(reuse_info.copyright_lines) == 1 + assert "SPDX-FileCopyrightText: in file" in reuse_info.copyright_lines -def test_spdx_info_of_also_covered_by_dep5(fake_repository): +def test_reuse_info_of_also_covered_by_dep5(fake_repository): """A file contains all SPDX information, but .reuse/dep5 also provides information on this file. Use only the information from the file header. @@ -272,14 +272,14 @@ def test_spdx_info_of_also_covered_by_dep5(fake_repository): ) ) project = Project(fake_repository) - spdx_info = project.spdx_info_of("doc/foo.py") - assert LicenseSymbol("MIT") in spdx_info.spdx_expressions - assert LicenseSymbol("CC0-1.0") not in spdx_info.spdx_expressions - assert "SPDX-FileCopyrightText: in file" in spdx_info.copyright_lines - assert "2017 Jane Doe" not in spdx_info.copyright_lines + reuse_info = project.reuse_info_of("doc/foo.py") + assert LicenseSymbol("MIT") in reuse_info.spdx_expressions + assert LicenseSymbol("CC0-1.0") not in reuse_info.spdx_expressions + assert "SPDX-FileCopyrightText: in file" in reuse_info.copyright_lines + assert "2017 Jane Doe" not in reuse_info.copyright_lines -def test_spdx_info_of_no_duplicates(empty_directory): +def test_reuse_info_of_no_duplicates(empty_directory): """A file contains the same lines twice. The SpdxInfo only contains those lines once. """ @@ -291,25 +291,25 @@ def test_spdx_info_of_no_duplicates(empty_directory): (empty_directory / "foo.py").write_text(text * 2) project = Project(empty_directory) - spdx_info = project.spdx_info_of("foo.py") - assert len(spdx_info.spdx_expressions) == 1 - assert LicenseSymbol("GPL-3.0-or-later") in spdx_info.spdx_expressions - assert len(spdx_info.copyright_lines) == 1 + reuse_info = project.reuse_info_of("foo.py") + assert len(reuse_info.spdx_expressions) == 1 + assert LicenseSymbol("GPL-3.0-or-later") in reuse_info.spdx_expressions + assert len(reuse_info.copyright_lines) == 1 assert ( "SPDX-FileCopyrightText: 2017 Free Software Foundation Europe" - in spdx_info.copyright_lines + in reuse_info.copyright_lines ) -def test_spdx_info_of_binary_succeeds(fake_repository): - """spdx_info_of succeeds when the target is covered by dep5.""" +def test_reuse_info_of_binary_succeeds(fake_repository): + """reuse_info_of succeeds when the target is covered by dep5.""" shutil.copy( RESOURCES_DIRECTORY / "fsfe.png", fake_repository / "doc/fsfe.png" ) project = Project(fake_repository) - spdx_info = project.spdx_info_of("doc/fsfe.png") - assert LicenseSymbol("CC0-1.0") in spdx_info.spdx_expressions + reuse_info = project.reuse_info_of("doc/fsfe.png") + assert LicenseSymbol("CC0-1.0") in reuse_info.spdx_expressions def test_license_file_detected(empty_directory): @@ -322,10 +322,10 @@ def test_license_file_detected(empty_directory): ) project = Project(empty_directory) - spdx_info = project.spdx_info_of("foo.py") + reuse_info = project.reuse_info_of("foo.py") - assert "SPDX-FileCopyrightText: 2017 Jane Doe" in spdx_info.copyright_lines - assert LicenseSymbol("MIT") in spdx_info.spdx_expressions + assert "SPDX-FileCopyrightText: 2017 Jane Doe" in reuse_info.copyright_lines + assert LicenseSymbol("MIT") in reuse_info.spdx_expressions def test_licenses_filename(empty_directory): From 7346234169b30705f75b2d69a0a34d69d81e2bc8 Mon Sep 17 00:00:00 2001 From: linozen Date: Thu, 1 Jun 2023 10:52:03 +0200 Subject: [PATCH 60/60] Add SourceType enum and use it for source_type in ReuseInfo This commit adds a new enum `SourceType` with three possible values to indicate if the source type is a `.license file`, `file header` or `DEP5 file`. It then updates the usage of `source_type` by replacing the string type with the new `SourceType` enum type. This improves readability and makes the code more maintainable. --- src/reuse/__init__.py | 18 +++++++++++++++++- src/reuse/project.py | 9 +++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index 2da988173..a20317b5b 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -20,6 +20,7 @@ import os import re from dataclasses import dataclass, field +from enum import Enum, auto from typing import NamedTuple, Optional, Set try: @@ -88,6 +89,21 @@ _IGNORE_FILE_PATTERNS.extend(_IGNORE_SPDX_PATTERNS) +class SourceType(Enum): + """ + An enumeration representing the types of sources for license information. + + Potential values: + DOT_LICENSE_FILE: A .license file containing license information. + FILE_HEADER: A file header containing license information. + DEP5_FILE: A .reuse/dep5 file containing license information. + """ + + DOT_LICENSE_FILE = ".license file" + FILE_HEADER = "file header" + DEP5_FILE = ".reuse/dep5 file" + + @dataclass(frozen=True) class ReuseInfo: """Simple dataclass holding licensing and copyright information""" @@ -96,7 +112,7 @@ class ReuseInfo: copyright_lines: Set[str] = field(default_factory=set) contributor_lines: Set[str] = field(default_factory=set) source_path: Optional[str] = None - source_type: Optional[str] = None + source_type: Optional[SourceType] = None def contains_copyright_or_licensing(self) -> bool: """Either *spdx_expressions* or *copyright_lines* is non-empty.""" diff --git a/src/reuse/project.py b/src/reuse/project.py index 7f6506863..8fb2c1b90 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -25,6 +25,7 @@ _IGNORE_MESON_PARENT_DIR_PATTERNS, IdentifierNotFound, ReuseInfo, + SourceType, ) from ._licenses import EXCEPTION_MAP, LICENSE_MAP from ._util import ( @@ -151,7 +152,7 @@ def reuse_info_of(self, path: PathLike) -> ReuseInfo: """ path = _determine_license_path(path) source_path = "" - source_type = "" + source_type = None _LOGGER.debug(f"searching '{path}' for SPDX information") @@ -189,9 +190,9 @@ def reuse_info_of(self, path: PathLike) -> ReuseInfo: if file_result: source_path = str(path) if path.suffix == ".license": - source_type = ".license file" + source_type = SourceType.DOT_LICENSE_FILE else: - source_type = "file header" + source_type = SourceType.FILE_HEADER except (ExpressionError, ParseError): _LOGGER.error( @@ -224,7 +225,7 @@ def reuse_info_of(self, path: PathLike) -> ReuseInfo: spdx_expressions=dep5_result.spdx_expressions, copyright_lines=dep5_result.copyright_lines, source_path=source_path, - source_type="DEP5 file", + source_type=SourceType.DEP5_FILE, ) # There is a file header or a .license file return ReuseInfo(