From aac49f8c1b9172b9f4746b824eb2f4c4199dcbe3 Mon Sep 17 00:00:00 2001 From: Sebastian Schuberth Date: Wed, 23 Feb 2022 08:48:36 +0100 Subject: [PATCH] FindingsMatcher: Add a function to associate exceptions by licenses E.g. ScanCode reports exceptions to licenses as individual license findings. That is problematic as exceptions on their own are not valid SPDX expressions, also see [1]. Introduce a new function that fixes up findings by associating exceptions by their belonging licenses. [1]: https://github.com/nexB/scancode-toolkit/issues/2873 Signed-off-by: Sebastian Schuberth --- .../src/main/kotlin/utils/FindingsMatcher.kt | 62 +++++++++++++++++++ .../test/kotlin/utils/FindingsMatcherTest.kt | 39 ++++++++++++ 2 files changed, 101 insertions(+) diff --git a/model/src/main/kotlin/utils/FindingsMatcher.kt b/model/src/main/kotlin/utils/FindingsMatcher.kt index b59dac2e5af9..3c1dd6cc39eb 100644 --- a/model/src/main/kotlin/utils/FindingsMatcher.kt +++ b/model/src/main/kotlin/utils/FindingsMatcher.kt @@ -27,6 +27,9 @@ import kotlin.math.min import org.ossreviewtoolkit.model.CopyrightFinding import org.ossreviewtoolkit.model.LicenseFinding import org.ossreviewtoolkit.model.TextLocation +import org.ossreviewtoolkit.utils.spdx.SpdxConstants.NOASSERTION +import org.ossreviewtoolkit.utils.spdx.SpdxLicenseException +import org.ossreviewtoolkit.utils.spdx.toSpdx /** * A class for matching copyright findings to license findings. Copyright statements may be matched either to license @@ -204,3 +207,62 @@ private fun MutableMap>.merge( getOrPut(licenseFinding) { mutableSetOf() } += copyrightFindings } } + +/** + * Process [findings] for stand-alone license exceptions and associate them with nearby (according to [toleranceLines]) + * applicable licenses. Orphan license exceptions will get associated by [NOASSERTION]. Return the list of resulting + * findings. + */ +fun associateLicensesWithExceptions( + findings: List, + toleranceLines: Int = FindingsMatcher.DEFAULT_TOLERANCE_LINES +): List { + val (exceptions, licenses) = findings.partition { SpdxLicenseException.forId(it.license.toString()) != null } + + val remainingExceptions = exceptions.toMutableList() + val fixedLicenses = licenses.toMutableList() + + val i = remainingExceptions.iterator() + + while (i.hasNext()) { + val exception = i.next() + + // Determine all licenses exception is applicable to. + val applicableLicenses = SpdxLicenseException.mapping[exception.license.toString()].orEmpty().map { it.id } + + // Determine applicable license findings from the same path. + val applicableLicenseFindings = licenses.filter { + it.location.path == exception.location.path && it.license.toString() in applicableLicenses + } + + // Find the closest license within the tolerance. + val associatedLicenseFinding = applicableLicenseFindings + .map { it to it.location.distanceTo(exception.location) } + .sortedBy { it.second } + .firstOrNull { it.second <= toleranceLines } + ?.first + + if (associatedLicenseFinding != null) { + // Add the fixed-up license with the exception. + fixedLicenses += associatedLicenseFinding.copy( + license = "${associatedLicenseFinding.license} WITH ${exception.license}".toSpdx(), + location = associatedLicenseFinding.location.copy( + startLine = min(associatedLicenseFinding.location.startLine, exception.location.startLine), + endLine = max(associatedLicenseFinding.location.endLine, exception.location.endLine) + ) + ) + + // Remove the original license and the stand-alone exception. + fixedLicenses.remove(associatedLicenseFinding) + i.remove() + } + } + + // Associate remaining "orphan" exceptions with "NOASSERTION" to turn them into valid SPDX expressions and at the + // same time "marking" them for review as "NOASSERTION" is not a real license. + remainingExceptions.mapTo(fixedLicenses) { exception -> + exception.copy(license = "$NOASSERTION WITH ${exception.license}".toSpdx()) + } + + return fixedLicenses +} diff --git a/model/src/test/kotlin/utils/FindingsMatcherTest.kt b/model/src/test/kotlin/utils/FindingsMatcherTest.kt index af2ae982a72e..9ac19dc01b91 100644 --- a/model/src/test/kotlin/utils/FindingsMatcherTest.kt +++ b/model/src/test/kotlin/utils/FindingsMatcherTest.kt @@ -223,5 +223,44 @@ class FindingsMatcherTest : WordSpec() { result.getCopyrights("root-license-1").map { it.statement } should containExactly("statement 1") } } + + "associateLicensesWithExceptions()" should { + "merge with the nearest license" { + associateLicensesWithExceptions( + listOf( + LicenseFinding("Apache-2.0", TextLocation("file", 1)), + LicenseFinding("Apache-2.0", TextLocation("file", 100)), + LicenseFinding("LLVM-exception", TextLocation("file", 5)) + ) + ) should containExactlyInAnyOrder( + LicenseFinding("Apache-2.0 WITH LLVM-exception", TextLocation("file", 1, 5)), + LicenseFinding("Apache-2.0", TextLocation("file", 100)) + ) + } + + "associate orphan exceptions by NOASSERTION" { + associateLicensesWithExceptions( + listOf( + LicenseFinding("GPL-2.0-only", TextLocation("file", 1)), + LicenseFinding("389-exception", TextLocation("file", 100)) + ) + ) should containExactlyInAnyOrder( + LicenseFinding("GPL-2.0-only", TextLocation("file", 1)), + LicenseFinding("NOASSERTION WITH 389-exception", TextLocation("file", 100)) + ) + } + + "not associate findings from different files" { + associateLicensesWithExceptions( + listOf( + LicenseFinding("Apache-2.0", TextLocation("fileA", 1)), + LicenseFinding("LLVM-exception", TextLocation("fileB", 5)) + ) + ) should containExactlyInAnyOrder( + LicenseFinding("Apache-2.0", TextLocation("fileA", 1)), + LicenseFinding("NOASSERTION WITH LLVM-exception", TextLocation("fileB", 5)) + ) + } + } } }