From 4d46a386efcebb43d3413b8142353d33d37ac67d Mon Sep 17 00:00:00 2001 From: Thomas Druez Date: Wed, 28 Apr 2021 18:03:35 +0200 Subject: [PATCH] Do not store the root prefix whe dealing with VirtualCodebase #147 Signed-off-by: Thomas Druez --- CHANGELOG.rst | 6 +++ etc/requirements/base.txt | 1 + scanpipe/pipes/scancode.py | 21 +++++----- scanpipe/tests/data/asgiref-3.3.0_scan.json | 43 +++++++++++++-------- scanpipe/tests/test_pipes.py | 16 +++++++- 5 files changed, 60 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7c75e9b52..24ebd99de 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,12 @@ // Release notes // ------------- +### unreleased + +- Fix an issue accessing codebase resource content using the scan_codebase and + load_inventory pipelines. + https://github.com/nexB/scancode.io/issues/147 + ### v21.4.28 - The installation local timezone can be configured using the TIME_ZONE setting. diff --git a/etc/requirements/base.txt b/etc/requirements/base.txt index fa83c625c..f6879ee0e 100644 --- a/etc/requirements/base.txt +++ b/etc/requirements/base.txt @@ -28,6 +28,7 @@ container_inspector>=3.1.2 # ScanCode-toolkit scancode-toolkit[packages]==21.3.31 +commoncode>=21.4.28 # Utilities XlsxWriter==1.3.8 diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index 977a2b02d..c3dafac40 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -312,9 +312,7 @@ def get_virtual_codebase(project, input_location): temp_path = project.tmp_path / "scancode-temp-resource-cache" temp_path.mkdir(parents=True, exist_ok=True) - return VirtualCodebase( - location=input_location, temp_dir=str(temp_path), max_in_memory=0 - ) + return VirtualCodebase(input_location, temp_dir=str(temp_path), max_in_memory=0) def create_codebase_resources(project, scanned_codebase): @@ -325,21 +323,25 @@ def create_codebase_resources(project, scanned_codebase): CodebaseResource objects as the existing objects (based on the `path`) will be skipped. """ - for scanned_resource in scanned_codebase.walk(): + for scanned_resource in scanned_codebase.walk(skip_root=True): resource_data = {} for field in CodebaseResource._meta.fields: + # Do not include the path as provided by the scanned_resource since it + # includes the "root". The `get_path` method is used instead. + if field.name == "path": + continue value = getattr(scanned_resource, field.name, None) if value is not None: resource_data[field.name] = value resource_type = "FILE" if scanned_resource.is_file else "DIRECTORY" resource_data["type"] = CodebaseResource.Type[resource_type] + resource_path = scanned_resource.get_path(strip_root=True) - path = resource_data.pop("path") CodebaseResource.objects.get_or_create( project=project, - path=path, + path=resource_path, defaults=resource_data, ) @@ -350,12 +352,13 @@ def create_discovered_packages(project, scanned_codebase): object to the DB as DiscoveredPackage of `project`. Relate package resources to CodebaseResource. """ - for scanned_resource in scanned_codebase.walk(): + for scanned_resource in scanned_codebase.walk(skip_root=True): scanned_packages = getattr(scanned_resource, "packages", []) if not scanned_packages: continue - cbr = CodebaseResource.objects.get(project=project, path=scanned_resource.path) + scanned_resource_path = scanned_resource.get_path(strip_root=True) + cbr = CodebaseResource.objects.get(project=project, path=scanned_resource_path) for scan_data in scanned_packages: discovered_package = pipes.update_or_create_package(project, scan_data) @@ -370,7 +373,7 @@ def create_discovered_packages(project, scanned_codebase): ) for scanned_package_res in scanned_package_resources: package_cbr = CodebaseResource.objects.get( - project=project, path=scanned_package_res.path + project=project, path=scanned_package_res.get_path(strip_root=True) ) set_codebase_resource_for_package( codebase_resource=package_cbr, discovered_package=discovered_package diff --git a/scanpipe/tests/data/asgiref-3.3.0_scan.json b/scanpipe/tests/data/asgiref-3.3.0_scan.json index 505305118..adc0e940c 100644 --- a/scanpipe/tests/data/asgiref-3.3.0_scan.json +++ b/scanpipe/tests/data/asgiref-3.3.0_scan.json @@ -2,7 +2,7 @@ "headers": [ { "tool_name": "scancode-toolkit", - "tool_version": "3.2.3", + "tool_version": "21.3.31", "options": { "input": [ "codebase" @@ -10,18 +10,17 @@ "--copyright": true, "--email": true, "--info": true, - "--json-pp": "output/scancode-2020-11-25-14-57-06.json", + "--json-pp": "output/scancode-2021-04-28-15-27-38.json", "--license": true, "--license-text": true, "--package": true, "--processes": "4", - "--timeout": "60.0", "--url": true }, "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", - "start_timestamp": "2020-11-25T145707.862006", - "end_timestamp": "2020-11-25T145716.181861", - "duration": 8.31989312171936, + "start_timestamp": "2021-04-28T152739.609244", + "end_timestamp": "2021-04-28T152743.569759", + "duration": 3.9605350494384766, "message": null, "errors": [], "extra_data": { @@ -71,7 +70,7 @@ "base_name": "asgiref-3.3.0-py3-none-any", "extension": ".whl", "size": 19948, - "date": "2020-11-25", + "date": "2021-04-28", "sha1": "c03f67211a311b13d1294ac8af7cb139ee34c4f9", "md5": "5bce1df6dedc53a41a9a6b40d7b1699e", "sha256": "a5098bc870b80e7b872bff60bb363c7f2c2c89078759f6c47b53ff8c525a152e", @@ -497,7 +496,9 @@ "owner": "Apache Software Foundation", "homepage_url": "http://www.apache.org/licenses/", "text_url": "http://www.apache.org/licenses/LICENSE-2.0", - "reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:apache-2.0", + "reference_url": "https://scancode-licensedb.aboutcode.org/apache-2.0", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/apache-2.0.LICENSE", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/apache-2.0.yml", "spdx_license_key": "Apache-2.0", "spdx_url": "https://spdx.org/licenses/Apache-2.0", "start_line": 2, @@ -512,11 +513,12 @@ "is_license_notice": true, "is_license_reference": false, "is_license_tag": false, + "is_license_intro": false, "matcher": "2-aho", "rule_length": 6, "matched_length": 6, "match_coverage": 100.0, - "rule_relevance": 100.0 + "rule_relevance": 100 }, "matched_text": "# under the Apache 2.0 license. You may see the original project at" } @@ -651,7 +653,9 @@ "owner": "Regents of the University of California", "homepage_url": "http://www.opensource.org/licenses/BSD-3-Clause", "text_url": "http://www.opensource.org/licenses/BSD-3-Clause", - "reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:bsd-new", + "reference_url": "https://scancode-licensedb.aboutcode.org/bsd-new", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.LICENSE", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.yml", "spdx_license_key": "BSD-3-Clause", "spdx_url": "https://spdx.org/licenses/BSD-3-Clause", "start_line": 4, @@ -666,11 +670,12 @@ "is_license_notice": false, "is_license_reference": false, "is_license_tag": false, + "is_license_intro": false, "matcher": "2-aho", "rule_length": 214, "matched_length": 214, "match_coverage": 100.0, - "rule_relevance": 100.0 + "rule_relevance": 100 }, "matched_text": "Redistribution and use in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n 1. Redistributions of source code must retain the above copyright notice,\n this list of conditions and the following disclaimer.\n\n 2. Redistributions in binary form must reproduce the above copyright\n notice, this list of conditions and the following disclaimer in the\n documentation and/or other materials provided with the distribution.\n\n 3. Neither the name of Django nor the names of its contributors may be used\n to endorse or promote products derived from this software without\n specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\nANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\nLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\nSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." } @@ -733,7 +738,9 @@ "owner": "Regents of the University of California", "homepage_url": "http://www.opensource.org/licenses/BSD-3-Clause", "text_url": "http://www.opensource.org/licenses/BSD-3-Clause", - "reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:bsd-new", + "reference_url": "https://scancode-licensedb.aboutcode.org/bsd-new", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.LICENSE", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.yml", "spdx_license_key": "BSD-3-Clause", "spdx_url": "https://spdx.org/licenses/BSD-3-Clause", "start_line": 8, @@ -748,11 +755,12 @@ "is_license_notice": false, "is_license_reference": true, "is_license_tag": false, + "is_license_intro": false, "matcher": "2-aho", "rule_length": 2, "matched_length": 2, "match_coverage": 100.0, - "rule_relevance": 95.0 + "rule_relevance": 95 }, "matched_text": "License: BSD" }, @@ -766,7 +774,9 @@ "owner": "Regents of the University of California", "homepage_url": "http://www.opensource.org/licenses/BSD-3-Clause", "text_url": "http://www.opensource.org/licenses/BSD-3-Clause", - "reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:bsd-new", + "reference_url": "https://scancode-licensedb.aboutcode.org/bsd-new", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.LICENSE", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.yml", "spdx_license_key": "BSD-3-Clause", "spdx_url": "https://spdx.org/licenses/BSD-3-Clause", "start_line": 16, @@ -781,11 +791,12 @@ "is_license_notice": false, "is_license_reference": false, "is_license_tag": true, + "is_license_intro": false, "matcher": "2-aho", "rule_length": 5, "matched_length": 5, "match_coverage": 100.0, - "rule_relevance": 99.0 + "rule_relevance": 99 }, "matched_text": "Classifier: License :: OSI Approved :: BSD License" } @@ -997,4 +1008,4 @@ "scan_errors": [] } ] -} +} \ No newline at end of file diff --git a/scanpipe/tests/test_pipes.py b/scanpipe/tests/test_pipes.py index 0dc1a6ee5..58b53419b 100644 --- a/scanpipe/tests/test_pipes.py +++ b/scanpipe/tests/test_pipes.py @@ -411,13 +411,25 @@ def test_scanpipe_pipes_scancode_virtual_codebase(self): scancode.create_codebase_resources(project, virtual_codebase) scancode.create_discovered_packages(project, virtual_codebase) - self.assertEqual(19, CodebaseResource.objects.count()) + self.assertEqual(18, CodebaseResource.objects.count()) self.assertEqual(1, DiscoveredPackage.objects.count()) + # Make sure the root is not created as a CodebaseResource, walk(skip_root=True) + self.assertFalse(CodebaseResource.objects.filter(path="codebase").exists()) + + # Make sure the root is properly stripped, see `.get_path(strip_root=True)` + self.assertFalse( + CodebaseResource.objects.filter(path__startswith="codebase").exists() + ) + + # Make sure the detected package is properly assigned to its codebase resource + package = DiscoveredPackage.objects.get() + expected = "asgiref-3.3.0-py3-none-any.whl" + self.assertEqual(expected, package.codebase_resources.get().path) # The functions can be called again and existing objects are skipped scancode.create_codebase_resources(project, virtual_codebase) scancode.create_discovered_packages(project, virtual_codebase) - self.assertEqual(19, CodebaseResource.objects.count()) + self.assertEqual(18, CodebaseResource.objects.count()) self.assertEqual(1, DiscoveredPackage.objects.count()) def test_scanpipe_pipes_scancode_create_codebase_resources_inject_policy(self):