From b204baf4a4f5b39d8b6064c5297a71f32a494a44 Mon Sep 17 00:00:00 2001 From: Thomas Druez Date: Thu, 28 Jan 2021 19:01:38 +0400 Subject: [PATCH] Log messages from Pipeline execution on a new Run instance `log` field #66 Signed-off-by: Thomas Druez --- docs/scanpipe-command-line.rst | 4 + scanpipe/api/serializers.py | 1 + scanpipe/apps.py | 9 ++ scanpipe/logging.py | 101 ++++++++++++++++++ scanpipe/management/commands/status.py | 4 + ...2_run_run_id.py => 0002_run_id_and_log.py} | 5 + scanpipe/models.py | 14 ++- scanpipe/pipes/scancode.py | 2 +- scanpipe/tests/test_commands.py | 17 ++- scanpipe/tests/test_models.py | 12 +++ 10 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 scanpipe/logging.py rename scanpipe/migrations/{0002_run_run_id.py => 0002_run_id_and_log.py} (71%) diff --git a/docs/scanpipe-command-line.rst b/docs/scanpipe-command-line.rst index f4487e34b..bffba5f0d 100644 --- a/docs/scanpipe-command-line.rst +++ b/docs/scanpipe-command-line.rst @@ -109,6 +109,10 @@ List all the pipelines added of the project named ``PROJECT``. Display status information about the provided ``PROJECT``. +.. note:: + The full logs of each pipeline execution are displayed by default. + This can be disabled providing the ``--verbosity 0`` option. + `$ scanpipe output --project PROJECT --format {json,csv,xlsx}` -------------------------------------------------------------- diff --git a/scanpipe/api/serializers.py b/scanpipe/api/serializers.py index ba706ad98..7a098f8f5 100644 --- a/scanpipe/api/serializers.py +++ b/scanpipe/api/serializers.py @@ -73,6 +73,7 @@ class Meta: "task_end_date", "task_exitcode", "task_output", + "log", "execution_time", ] diff --git a/scanpipe/apps.py b/scanpipe/apps.py index 68ddf140c..46f82c96c 100644 --- a/scanpipe/apps.py +++ b/scanpipe/apps.py @@ -25,6 +25,9 @@ from django.apps import AppConfig from django.utils.translation import gettext_lazy as _ +from scanpipe.logging import RunLogger +from scanpipe.logging import extra_logging + class ScanPipeConfig(AppConfig): name = "scanpipe" @@ -45,6 +48,12 @@ def ready(self): name = child.name[: -len(dot_py_suffix)] self.pipelines.append((location, name)) + # Decorates the default metaflow logger to capture log messages + # Warning: This import cannot be moved outside this method + from metaflow import cli + + cli.logger = extra_logging(cli.logger, RunLogger()) + def is_valid(self, pipeline): """ Return True if the pipeline is valid and available. diff --git a/scanpipe/logging.py b/scanpipe/logging.py new file mode 100644 index 000000000..39473716c --- /dev/null +++ b/scanpipe/logging.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +import functools +import re +from contextlib import suppress +from datetime import datetime +from pathlib import Path + +from django.apps import apps +from django.core.exceptions import MultipleObjectsReturned +from django.core.exceptions import ObjectDoesNotExist +from django.utils.functional import cached_property + +from scancodeio import WORKSPACE_LOCATION + + +def extra_logging(func, extra_logger): + """ + Decorator to add logging customization the default Metaflow logger. + This is used as a workaround since Metaflow does not provide any API to customize + the logging of Flow/Pipeline execution. + """ + + @functools.wraps(func) + def wrapper_decorator(*args, **kwargs): + value = func(*args, **kwargs) + extra_logger.log(*args, **kwargs) + return value + + return wrapper_decorator + + +class RunLogger: + """ + Log messages from Pipeline execution on the Run instance `log` field. + If the `run_id` is not available in the message, the message is logged to the + `log_file`. + + One caveat to this approach is that the `run_id` is set on the Run instance during + the "start" step of a Pipeline, making the "Task is starting." message for that + initial step not logged in the `Run.log` field. + """ + + log_file = Path(WORKSPACE_LOCATION) / "scanpipe.log" + + @cached_property + def run_model(self): + return apps.get_model("scanpipe", "Run") + + @staticmethod + def get_run_id(head): + run_id_pattern = re.compile(r"\[(?P[0-9]{16})/") + match = run_id_pattern.search(head) + if match: + return match.group("run_id") + + def get_run(self, run_id): + if not run_id: + return + with suppress(ObjectDoesNotExist, MultipleObjectsReturned): + return self.run_model.objects.get(run_id=run_id) + + def log(self, body="", head="", **kwargs): + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + message = f"{timestamp} {head}{body}" + + run_id = self.get_run_id(head) + run = self.get_run(run_id) + if run: + self.log_to_run_instance(run, message) + else: + self.log_to_file(message) + + @staticmethod + def log_to_run_instance(run, message): + run.append_to_log(message) + run.save() + + def log_to_file(self, message): + with open(self.log_file, "a+") as f: + f.write(message + "\n") diff --git a/scanpipe/management/commands/status.py b/scanpipe/management/commands/status.py index 554f3c36f..187cb4022 100644 --- a/scanpipe/management/commands/status.py +++ b/scanpipe/management/commands/status.py @@ -52,12 +52,16 @@ def handle(self, *args, **options): status = status or "(no status)" message.append(f" - {status}: {count}") + display_runs_log = options["verbosity"] > 0 runs = self.project.runs.all() if runs: message.append("\nPipelines:") for run in runs: status_code = self.get_run_status_code(run) message.append(f" [{status_code}] {run.pipeline}") + if display_runs_log and run.log: + for line in run.log.split("\n"): + message.append(3 * " " + line) for line in message: self.stdout.write(line) diff --git a/scanpipe/migrations/0002_run_run_id.py b/scanpipe/migrations/0002_run_id_and_log.py similarity index 71% rename from scanpipe/migrations/0002_run_run_id.py rename to scanpipe/migrations/0002_run_id_and_log.py index 4c74a164c..f57d644fe 100644 --- a/scanpipe/migrations/0002_run_run_id.py +++ b/scanpipe/migrations/0002_run_id_and_log.py @@ -15,4 +15,9 @@ class Migration(migrations.Migration): name='run_id', field=models.CharField(blank=True, editable=False, max_length=16), ), + migrations.AddField( + model_name='run', + name='log', + field=models.TextField(blank=True, editable=False), + ), ] diff --git a/scanpipe/models.py b/scanpipe/models.py index 09506247e..1e86e5efe 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -20,7 +20,6 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. -import re import shutil import traceback import uuid @@ -468,6 +467,7 @@ class Run(UUIDPKModel, ProjectRelatedModel, AbstractTaskFieldsModel): created_date = models.DateTimeField(auto_now_add=True, db_index=True) description = models.TextField(blank=True) run_id = models.CharField(max_length=16, blank=True, editable=False) + log = models.TextField(blank=True, editable=False) objects = RunQuerySet.as_manager() @@ -490,6 +490,18 @@ def task_succeeded(self): """ return self.task_exitcode == 0 + def append_to_log(self, message, save=False): + """ + Append the `message` string to the `log` field of this Run instance. + """ + message = message.strip() + if any(lf in message for lf in ("\n", "\r")): + raise ValueError("message cannot contain line returns (either CR or LF).") + + self.log = self.log + message + "\n" + if save: + self.save() + def profile(self, print_results=False): """ Return computed execution times for each steps of this Run. diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index ba6bace90..839656acb 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -44,7 +44,7 @@ def run_extractcode(location, options=None, raise_on_error=False): """ Extract content at `location` with extractcode. Optional arguments for the `extractcode` executable can be provided with the - `options` list. + `options` list. If `raise_on_error` is enabled, a ScancodeError will be raised if the exitcode greater than 0. """ diff --git a/scanpipe/tests/test_commands.py b/scanpipe/tests/test_commands.py index 40e7a6c67..a3f5d7bbd 100644 --- a/scanpipe/tests/test_commands.py +++ b/scanpipe/tests/test_commands.py @@ -28,6 +28,7 @@ from django.core.management import CommandError from django.core.management import call_command from django.test import TestCase +from django.utils import timezone from scanpipe.management.commands.graph import is_graphviz_installed from scanpipe.models import Project @@ -274,7 +275,7 @@ def test_scanpipe_management_command_run_resume(self, mock_resume_pipeline_task) def test_scanpipe_management_command_status(self): project = Project.objects.create(name="my_project") - project.add_pipeline(self.pipeline_location) + run = project.add_pipeline(self.pipeline_location) options = ["--project", project.name, "--no-color"] out = StringIO() @@ -287,6 +288,20 @@ def test_scanpipe_management_command_status(self): self.assertIn("- ProjectError: 0", output) self.assertIn("[ ] scanpipe/pipelines/docker.py", output) + run.task_start_date = timezone.now() + run.log = ( + "[1611839665826870/start/1 (pid 65890)] Task finished successfully.\n" + "[1611839665826870/extract_images/2 (pid 65914)] Task is starting.\n" + ) + run.save() + out = StringIO() + call_command("status", *options, stdout=out) + + output = out.getvalue() + self.assertIn("[RUNNING] scanpipe/pipelines/docker.py", output) + for line in run.log.split("\n"): + self.assertIn(line, output) + def test_scanpipe_management_command_output(self): project = Project.objects.create(name="my_project") diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index ed60acadf..4821e6b6e 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -270,6 +270,18 @@ def test_scanpipe_run_model_queryset_methods(self): qs = self.project1.runs.failed() self.assertEqual([failed], list(qs)) + def test_scanpipe_run_model_append_to_log(self): + run1 = self.create_run() + + with self.assertRaises(ValueError): + run1.append_to_log("multiline\nmessage") + + run1.append_to_log("line1") + run1.append_to_log("line2", save=True) + + run1.refresh_from_db() + self.assertEqual("line1\nline2\n", run1.log) + def test_scanpipe_run_model_profile_method(self): run1 = self.create_run()