From bde05131587916c89286d424c3bbfb799e880489 Mon Sep 17 00:00:00 2001 From: tdruez <489057+tdruez@users.noreply.github.com> Date: Fri, 22 Jan 2021 13:38:42 +0400 Subject: [PATCH] Add a profile() method on the Run model for profiling pipeline execution #73 Signed-off-by: Thomas Druez --- CHANGELOG.rst | 3 ++ scanpipe/models.py | 39 +++++++++++++++++++++++ scanpipe/tests/test_models.py | 58 ++++++++++++++++++++++++++++++++++- 3 files changed, 99 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ffb38bf88..b2dd3e2f8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,9 @@ - Add support for scancode pipes and Project name with whitespaces +- Add a profile() method on the Run model for profiling pipeline execution + https://github.com/nexB/scancode.io/issues/73 + ### v1.0.6 (2020-12-23) - Add a management command to delete a Project and its related work directories diff --git a/scanpipe/models.py b/scanpipe/models.py index 4b9dbffd8..5624b448a 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -25,6 +25,7 @@ import traceback import uuid from contextlib import suppress +from datetime import datetime from pathlib import Path from django.core import checks @@ -498,6 +499,44 @@ def get_run_id(self): if match: return match.group("run_id") + def profile(self, print_results=False): + """ + Return computed execution times for each steps of this Run. + + If `print_results` is provided, the results are printed to stdout. + """ + if not self.task_succeeded: + return + + profiler = {} + for line in self.task_output.split("\n"): + if not line.endswith(("starting.", "successfully.")): + continue + + segments = line.split() + line_date_str = " ".join(segments[0:2]) + line_date = datetime.strptime(line_date_str, "%Y-%m-%d %H:%M:%S.%f") + step = segments[2].split("/")[1] + + if line.endswith("starting."): + profiler[step] = line_date + elif line.endswith("successfully."): + start_date = profiler[step] + profiler[step] = (line_date - start_date).seconds + + if not print_results: + return profiler + + total_run_time = sum(profiler.values()) + padding = max(len(name) for name in profiler.keys()) + 1 + for step, step_execution_time in profiler.items(): + percent = round(step_execution_time * 100 / total_run_time, 1) + output_str = f"{step:{padding}} {step_execution_time:>3} seconds {percent}%" + if percent > 50: + print("\033[41;37m" + output_str + "\033[m") + else: + print(output_str) + class CodebaseResourceQuerySet(ProjectRelatedQuerySet): def status(self, status=None): diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 743861562..6d286a31e 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -19,9 +19,10 @@ # # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. - +import io import tempfile import uuid +from contextlib import redirect_stdout from datetime import datetime from pathlib import Path from unittest import mock @@ -285,6 +286,61 @@ def test_scanpipe_run_model_queryset_methods(self): qs = self.project1.runs.failed() self.assertEqual([failed], list(qs)) + def test_scanpipe_run_model_profile_method(self): + run1 = self.create_run() + + self.assertIsNone(run1.profile()) + + run1.task_output = ( + "Validating your flow...\n" + " The graph looks good!\n" + "Running pylint...\n" + " Pylint is happy!\n" + "2021-01-08 15:44:19.380 Workflow starting (run-id 1):\n" + "2021-01-08 15:44:19.385 [1/start/1 (pid 1)] Task is starting.\n" + "2021-01-08 15:44:20.720 [1/start/1 (pid 1)] Task finished successfully.\n" + "2021-01-08 15:44:20.727 [1/step1/2 (pid 1)] Task is starting.\n" + "2021-01-08 15:44:26.722 [1/step1/2 (pid 1)] Task finished successfully.\n" + "2021-01-08 15:44:26.729 [1/step2/3 (pid 1)] Task is starting.\n" + "2021-01-08 15:44:31.619 [1/step2/3 (pid 1)] Task finished successfully.\n" + "2021-01-08 15:44:31.626 [1/step3/4 (pid 1)] Task is starting.\n" + "2021-01-08 15:44:33.119 [1/step3/4 (pid 1)] Task finished successfully.\n" + "2021-01-08 15:44:38.481 [1/step4/5 (pid 1)] Task is starting.\n" + "2021-01-08 15:54:40.042 [1/step4/5 (pid 1)] Task finished successfully.\n" + "2021-01-08 15:55:04.345 [1/end/13 (pid 1)] Task is starting.\n" + "2021-01-08 15:55:05.651 [1/end/13 (pid 1)] Task finished successfully.\n" + "2021-01-08 15:55:05.652 Done!'" + ) + run1.save() + self.assertIsNone(run1.profile()) + + run1.task_exitcode = 0 + run1.save() + + expected = { + "start": 1, + "step1": 5, + "step2": 4, + "step3": 1, + "step4": 601, + "end": 1, + } + self.assertEqual(expected, run1.profile()) + + output = io.StringIO() + with redirect_stdout(output): + self.assertIsNone(run1.profile(print_results=True)) + + expected = ( + "start 1 seconds 0.2%\n" + "step1 5 seconds 0.8%\n" + "step2 4 seconds 0.7%\n" + "step3 1 seconds 0.2%\n" + "\x1b[41;37mstep4 601 seconds 98.0%\x1b[m\n" + "end 1 seconds 0.2%\n" + ) + self.assertEqual(expected, output.getvalue()) + def test_scanpipe_codebase_resource_model_methods(self): resource = CodebaseResource.objects.create( project=self.project1, path="filename.ext"