Skip to content

Commit

Permalink
Do not store the root prefix whe dealing with VirtualCodebase #147
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Druez <tdruez@nexb.com>
  • Loading branch information
tdruez committed Apr 28, 2021
1 parent 9bd38ee commit 4d46a38
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 27 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
// Release notes
// -------------

### unreleased

- Fix an issue accessing codebase resource content using the scan_codebase and
load_inventory pipelines.
https://github.com/nexB/scancode.io/issues/147

### v21.4.28

- The installation local timezone can be configured using the TIME_ZONE setting.
Expand Down
1 change: 1 addition & 0 deletions etc/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ container_inspector>=3.1.2

# ScanCode-toolkit
scancode-toolkit[packages]==21.3.31
commoncode>=21.4.28

# Utilities
XlsxWriter==1.3.8
Expand Down
21 changes: 12 additions & 9 deletions scanpipe/pipes/scancode.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,7 @@ def get_virtual_codebase(project, input_location):
temp_path = project.tmp_path / "scancode-temp-resource-cache"
temp_path.mkdir(parents=True, exist_ok=True)

return VirtualCodebase(
location=input_location, temp_dir=str(temp_path), max_in_memory=0
)
return VirtualCodebase(input_location, temp_dir=str(temp_path), max_in_memory=0)


def create_codebase_resources(project, scanned_codebase):
Expand All @@ -325,21 +323,25 @@ def create_codebase_resources(project, scanned_codebase):
CodebaseResource objects as the existing objects (based on the `path`) will be
skipped.
"""
for scanned_resource in scanned_codebase.walk():
for scanned_resource in scanned_codebase.walk(skip_root=True):
resource_data = {}

for field in CodebaseResource._meta.fields:
# Do not include the path as provided by the scanned_resource since it
# includes the "root". The `get_path` method is used instead.
if field.name == "path":
continue
value = getattr(scanned_resource, field.name, None)
if value is not None:
resource_data[field.name] = value

resource_type = "FILE" if scanned_resource.is_file else "DIRECTORY"
resource_data["type"] = CodebaseResource.Type[resource_type]
resource_path = scanned_resource.get_path(strip_root=True)

path = resource_data.pop("path")
CodebaseResource.objects.get_or_create(
project=project,
path=path,
path=resource_path,
defaults=resource_data,
)

Expand All @@ -350,12 +352,13 @@ def create_discovered_packages(project, scanned_codebase):
object to the DB as DiscoveredPackage of `project`.
Relate package resources to CodebaseResource.
"""
for scanned_resource in scanned_codebase.walk():
for scanned_resource in scanned_codebase.walk(skip_root=True):
scanned_packages = getattr(scanned_resource, "packages", [])
if not scanned_packages:
continue

cbr = CodebaseResource.objects.get(project=project, path=scanned_resource.path)
scanned_resource_path = scanned_resource.get_path(strip_root=True)
cbr = CodebaseResource.objects.get(project=project, path=scanned_resource_path)

for scan_data in scanned_packages:
discovered_package = pipes.update_or_create_package(project, scan_data)
Expand All @@ -370,7 +373,7 @@ def create_discovered_packages(project, scanned_codebase):
)
for scanned_package_res in scanned_package_resources:
package_cbr = CodebaseResource.objects.get(
project=project, path=scanned_package_res.path
project=project, path=scanned_package_res.get_path(strip_root=True)
)
set_codebase_resource_for_package(
codebase_resource=package_cbr, discovered_package=discovered_package
Expand Down
43 changes: 27 additions & 16 deletions scanpipe/tests/data/asgiref-3.3.0_scan.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,25 @@
"headers": [
{
"tool_name": "scancode-toolkit",
"tool_version": "3.2.3",
"tool_version": "21.3.31",
"options": {
"input": [
"codebase"
],
"--copyright": true,
"--email": true,
"--info": true,
"--json-pp": "output/scancode-2020-11-25-14-57-06.json",
"--json-pp": "output/scancode-2021-04-28-15-27-38.json",
"--license": true,
"--license-text": true,
"--package": true,
"--processes": "4",
"--timeout": "60.0",
"--url": true
},
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"start_timestamp": "2020-11-25T145707.862006",
"end_timestamp": "2020-11-25T145716.181861",
"duration": 8.31989312171936,
"start_timestamp": "2021-04-28T152739.609244",
"end_timestamp": "2021-04-28T152743.569759",
"duration": 3.9605350494384766,
"message": null,
"errors": [],
"extra_data": {
Expand Down Expand Up @@ -71,7 +70,7 @@
"base_name": "asgiref-3.3.0-py3-none-any",
"extension": ".whl",
"size": 19948,
"date": "2020-11-25",
"date": "2021-04-28",
"sha1": "c03f67211a311b13d1294ac8af7cb139ee34c4f9",
"md5": "5bce1df6dedc53a41a9a6b40d7b1699e",
"sha256": "a5098bc870b80e7b872bff60bb363c7f2c2c89078759f6c47b53ff8c525a152e",
Expand Down Expand Up @@ -497,7 +496,9 @@
"owner": "Apache Software Foundation",
"homepage_url": "http://www.apache.org/licenses/",
"text_url": "http://www.apache.org/licenses/LICENSE-2.0",
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:apache-2.0",
"reference_url": "https://scancode-licensedb.aboutcode.org/apache-2.0",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/apache-2.0.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/apache-2.0.yml",
"spdx_license_key": "Apache-2.0",
"spdx_url": "https://spdx.org/licenses/Apache-2.0",
"start_line": 2,
Expand All @@ -512,11 +513,12 @@
"is_license_notice": true,
"is_license_reference": false,
"is_license_tag": false,
"is_license_intro": false,
"matcher": "2-aho",
"rule_length": 6,
"matched_length": 6,
"match_coverage": 100.0,
"rule_relevance": 100.0
"rule_relevance": 100
},
"matched_text": "# under the Apache 2.0 license. You may see the original project at"
}
Expand Down Expand Up @@ -651,7 +653,9 @@
"owner": "Regents of the University of California",
"homepage_url": "http://www.opensource.org/licenses/BSD-3-Clause",
"text_url": "http://www.opensource.org/licenses/BSD-3-Clause",
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:bsd-new",
"reference_url": "https://scancode-licensedb.aboutcode.org/bsd-new",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.yml",
"spdx_license_key": "BSD-3-Clause",
"spdx_url": "https://spdx.org/licenses/BSD-3-Clause",
"start_line": 4,
Expand All @@ -666,11 +670,12 @@
"is_license_notice": false,
"is_license_reference": false,
"is_license_tag": false,
"is_license_intro": false,
"matcher": "2-aho",
"rule_length": 214,
"matched_length": 214,
"match_coverage": 100.0,
"rule_relevance": 100.0
"rule_relevance": 100
},
"matched_text": "Redistribution and use in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n 1. Redistributions of source code must retain the above copyright notice,\n this list of conditions and the following disclaimer.\n\n 2. Redistributions in binary form must reproduce the above copyright\n notice, this list of conditions and the following disclaimer in the\n documentation and/or other materials provided with the distribution.\n\n 3. Neither the name of Django nor the names of its contributors may be used\n to endorse or promote products derived from this software without\n specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\nANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\nLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\nSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
}
Expand Down Expand Up @@ -733,7 +738,9 @@
"owner": "Regents of the University of California",
"homepage_url": "http://www.opensource.org/licenses/BSD-3-Clause",
"text_url": "http://www.opensource.org/licenses/BSD-3-Clause",
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:bsd-new",
"reference_url": "https://scancode-licensedb.aboutcode.org/bsd-new",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.yml",
"spdx_license_key": "BSD-3-Clause",
"spdx_url": "https://spdx.org/licenses/BSD-3-Clause",
"start_line": 8,
Expand All @@ -748,11 +755,12 @@
"is_license_notice": false,
"is_license_reference": true,
"is_license_tag": false,
"is_license_intro": false,
"matcher": "2-aho",
"rule_length": 2,
"matched_length": 2,
"match_coverage": 100.0,
"rule_relevance": 95.0
"rule_relevance": 95
},
"matched_text": "License: BSD"
},
Expand All @@ -766,7 +774,9 @@
"owner": "Regents of the University of California",
"homepage_url": "http://www.opensource.org/licenses/BSD-3-Clause",
"text_url": "http://www.opensource.org/licenses/BSD-3-Clause",
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:bsd-new",
"reference_url": "https://scancode-licensedb.aboutcode.org/bsd-new",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-new.yml",
"spdx_license_key": "BSD-3-Clause",
"spdx_url": "https://spdx.org/licenses/BSD-3-Clause",
"start_line": 16,
Expand All @@ -781,11 +791,12 @@
"is_license_notice": false,
"is_license_reference": false,
"is_license_tag": true,
"is_license_intro": false,
"matcher": "2-aho",
"rule_length": 5,
"matched_length": 5,
"match_coverage": 100.0,
"rule_relevance": 99.0
"rule_relevance": 99
},
"matched_text": "Classifier: License :: OSI Approved :: BSD License"
}
Expand Down Expand Up @@ -997,4 +1008,4 @@
"scan_errors": []
}
]
}
}
16 changes: 14 additions & 2 deletions scanpipe/tests/test_pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,13 +411,25 @@ def test_scanpipe_pipes_scancode_virtual_codebase(self):
scancode.create_codebase_resources(project, virtual_codebase)
scancode.create_discovered_packages(project, virtual_codebase)

self.assertEqual(19, CodebaseResource.objects.count())
self.assertEqual(18, CodebaseResource.objects.count())
self.assertEqual(1, DiscoveredPackage.objects.count())
# Make sure the root is not created as a CodebaseResource, walk(skip_root=True)
self.assertFalse(CodebaseResource.objects.filter(path="codebase").exists())

# Make sure the root is properly stripped, see `.get_path(strip_root=True)`
self.assertFalse(
CodebaseResource.objects.filter(path__startswith="codebase").exists()
)

# Make sure the detected package is properly assigned to its codebase resource
package = DiscoveredPackage.objects.get()
expected = "asgiref-3.3.0-py3-none-any.whl"
self.assertEqual(expected, package.codebase_resources.get().path)

# The functions can be called again and existing objects are skipped
scancode.create_codebase_resources(project, virtual_codebase)
scancode.create_discovered_packages(project, virtual_codebase)
self.assertEqual(19, CodebaseResource.objects.count())
self.assertEqual(18, CodebaseResource.objects.count())
self.assertEqual(1, DiscoveredPackage.objects.count())

def test_scanpipe_pipes_scancode_create_codebase_resources_inject_policy(self):
Expand Down

0 comments on commit 4d46a38

Please sign in to comment.