Skip to content

Commit

Permalink
Move JSONResultsGenerator to the pipes.outputs module #45
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Druez <tdruez@nexb.com>
  • Loading branch information
tdruez committed Nov 16, 2020
1 parent 2d27295 commit d282c00
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 112 deletions.
10 changes: 5 additions & 5 deletions scanpipe/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
from scanpipe.models import Project
from scanpipe.models import ProjectError
from scanpipe.models import Run
from scanpipe.outputs import ResultsGenerator
from scanpipe.pipelines import get_pipeline_description
from scanpipe.pipes.outputs import JSONResultsGenerator

scanpipe_app_config = apps.get_app_config("scanpipe")

Expand Down Expand Up @@ -75,10 +75,10 @@ def results(self, request, *args, **kwargs):
"""
Return the results compatible with ScanCode data format.
The content is returned as a stream of JSON content using the
ResultsGenerator.
JSONResultsGenerator class.
"""
project = self.get_object()
results_generator = ResultsGenerator(project)
results_generator = JSONResultsGenerator(project)
return StreamingHttpResponse(
streaming_content=results_generator, content_type="application/json"
)
Expand All @@ -89,10 +89,10 @@ def results(self, request, *args, **kwargs):
def results_download(self, request, *args, **kwargs):
"""
Return the results as an attachment.
The content is streamed using the ResultsGenerator.
The content is streamed using the JSONResultsGenerator.
"""
project = self.get_object()
results_generator = ResultsGenerator(project)
results_generator = JSONResultsGenerator(project)

response = FileResponse(
streaming_content=results_generator,
Expand Down
4 changes: 2 additions & 2 deletions scanpipe/management/commands/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.management.commands import ProjectCommand
from scanpipe.outputs import ResultsGenerator
from scanpipe.pipes.outputs import JSONResultsGenerator


class Command(ProjectCommand):
Expand All @@ -38,7 +38,7 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
super().handle(*args, **options)

results_generator = ResultsGenerator(self.project)
results_generator = JSONResultsGenerator(self.project)
output_file = options["output_file"]

stream = open(output_file, "w") if output_file else self.stdout
Expand Down
103 changes: 0 additions & 103 deletions scanpipe/outputs.py

This file was deleted.

78 changes: 78 additions & 0 deletions scanpipe/pipes/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,15 @@
# Visit https://github.com/nexB/scancode.io for support and download.

import csv
import json

from django.core.serializers.json import DjangoJSONEncoder

from scancodeio import SCAN_NOTICE
from scancodeio import __version__ as scancodeio_version
from scanpipe.api.serializers import CodebaseResourceSerializer
from scanpipe.api.serializers import DiscoveredPackageSerializer
from scanpipe.api.serializers import RunSerializer


def queryset_to_csv(project, queryset, fieldnames):
Expand Down Expand Up @@ -61,3 +67,75 @@ def to_csv(project):
for queryset, serializer in data_sources:
fieldnames = list(serializer().get_fields().keys())
queryset_to_csv(project, queryset, fieldnames)


class JSONResultsGenerator:
"""
Return the `project` JSON results as a generator.
This allow to stream those results from the database to the client browser
without having to load everything in memory first.
Note that the Django Serializer class can output to a stream but cannot be
sent directly to a StreamingHttpResponse.
The results would have to be streamed to a file first, then iterated by the
StreamingHttpResponse, which do not work great in a HTTP request context as
the request can timeout while the file is generated.
"""

def __init__(self, project):
self.project = project

def __iter__(self):
yield "{\n"
yield from self.serialize(label="headers", generator=self.get_headers)
yield from self.serialize(label="packages", generator=self.get_packages)
yield from self.serialize(label="files", generator=self.get_files, latest=True)
yield "}"

def serialize(self, label, generator, latest=False):
yield f'"{label}": [\n'

prefix = ",\n"
first = True

for entry in generator(self.project):
if first:
first = False
else:
entry = prefix + entry
yield entry

yield "]\n" if latest else "],\n"

@staticmethod
def encode(data):
return json.dumps(data, indent=2, cls=DjangoJSONEncoder)

def get_headers(self, project):
runs = project.runs.all()
runs = RunSerializer(runs, many=True, exclude_fields=("url", "project"))

headers = {
"tool_name": "scanpipe",
"tool_version": scancodeio_version,
"notice": SCAN_NOTICE,
"uuid": project.uuid,
"created_date": project.created_date,
"input_files": project.input_files,
"runs": runs.data,
"extra_data": project.extra_data,
}
yield self.encode(headers)

def get_packages(self, project):
packages = project.discoveredpackages.all()

for obj in packages.iterator():
yield self.encode(DiscoveredPackageSerializer(obj).data)

def get_files(self, project):
resources = project.codebaseresources.without_symlinks()
resources = resources.prefetch_related("discovered_packages")

for obj in resources.iterator():
yield self.encode(CodebaseResourceSerializer(obj).data)
4 changes: 2 additions & 2 deletions scanpipe/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@
from rest_framework.exceptions import ErrorDetail
from rest_framework.test import APIClient

from scanpipe.api.views import ResultsGenerator
from scanpipe.models import CodebaseResource
from scanpipe.models import DiscoveredPackage
from scanpipe.models import Project
from scanpipe.models import ProjectError
from scanpipe.pipes.outputs import JSONResultsGenerator
from scanpipe.tests import package_data1


Expand Down Expand Up @@ -152,7 +152,7 @@ def test_scanpipe_api_project_create(self, mock_run_pipeline_task):
self.assertEqual(["upload_file"], response.data["input_root"])

def test_scanpipe_api_project_results_generator(self):
results_generator = ResultsGenerator(self.project1)
results_generator = JSONResultsGenerator(self.project1)
results = json.loads("".join(results_generator))

expected = ["files", "headers", "packages"]
Expand Down

0 comments on commit d282c00

Please sign in to comment.