From bde05131587916c89286d424c3bbfb799e880489 Mon Sep 17 00:00:00 2001
From: tdruez <489057+tdruez@users.noreply.github.com>
Date: Fri, 22 Jan 2021 13:38:42 +0400
Subject: [PATCH] Add a profile() method on the Run model for profiling
 pipeline execution #73

Signed-off-by: Thomas Druez <tdruez@nexb.com>
---
 CHANGELOG.rst                 |  3 ++
 scanpipe/models.py            | 39 +++++++++++++++++++++++
 scanpipe/tests/test_models.py | 58 ++++++++++++++++++++++++++++++++++-
 3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index ffb38bf88..b2dd3e2f8 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -5,6 +5,9 @@
 
 - Add support for scancode pipes and Project name with whitespaces
 
+- Add a profile() method on the Run model for profiling pipeline execution
+  https://github.com/nexB/scancode.io/issues/73
+
 ### v1.0.6 (2020-12-23)
 
 - Add a management command to delete a Project and its related work directories
diff --git a/scanpipe/models.py b/scanpipe/models.py
index 4b9dbffd8..5624b448a 100644
--- a/scanpipe/models.py
+++ b/scanpipe/models.py
@@ -25,6 +25,7 @@
 import traceback
 import uuid
 from contextlib import suppress
+from datetime import datetime
 from pathlib import Path
 
 from django.core import checks
@@ -498,6 +499,44 @@ def get_run_id(self):
             if match:
                 return match.group("run_id")
 
+    def profile(self, print_results=False):
+        """
+        Return computed execution times for each steps of this Run.
+
+        If `print_results` is provided, the results are printed to stdout.
+        """
+        if not self.task_succeeded:
+            return
+
+        profiler = {}
+        for line in self.task_output.split("\n"):
+            if not line.endswith(("starting.", "successfully.")):
+                continue
+
+            segments = line.split()
+            line_date_str = " ".join(segments[0:2])
+            line_date = datetime.strptime(line_date_str, "%Y-%m-%d %H:%M:%S.%f")
+            step = segments[2].split("/")[1]
+
+            if line.endswith("starting."):
+                profiler[step] = line_date
+            elif line.endswith("successfully."):
+                start_date = profiler[step]
+                profiler[step] = (line_date - start_date).seconds
+
+        if not print_results:
+            return profiler
+
+        total_run_time = sum(profiler.values())
+        padding = max(len(name) for name in profiler.keys()) + 1
+        for step, step_execution_time in profiler.items():
+            percent = round(step_execution_time * 100 / total_run_time, 1)
+            output_str = f"{step:{padding}} {step_execution_time:>3} seconds {percent}%"
+            if percent > 50:
+                print("\033[41;37m" + output_str + "\033[m")
+            else:
+                print(output_str)
+
 
 class CodebaseResourceQuerySet(ProjectRelatedQuerySet):
     def status(self, status=None):
diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py
index 743861562..6d286a31e 100644
--- a/scanpipe/tests/test_models.py
+++ b/scanpipe/tests/test_models.py
@@ -19,9 +19,10 @@
 #
 # ScanCode.io is a free software code scanning tool from nexB Inc. and others.
 # Visit https://github.com/nexB/scancode.io for support and download.
-
+import io
 import tempfile
 import uuid
+from contextlib import redirect_stdout
 from datetime import datetime
 from pathlib import Path
 from unittest import mock
@@ -285,6 +286,61 @@ def test_scanpipe_run_model_queryset_methods(self):
         qs = self.project1.runs.failed()
         self.assertEqual([failed], list(qs))
 
+    def test_scanpipe_run_model_profile_method(self):
+        run1 = self.create_run()
+
+        self.assertIsNone(run1.profile())
+
+        run1.task_output = (
+            "Validating your flow...\n"
+            "    The graph looks good!\n"
+            "Running pylint...\n"
+            "    Pylint is happy!\n"
+            "2021-01-08 15:44:19.380 Workflow starting (run-id 1):\n"
+            "2021-01-08 15:44:19.385 [1/start/1 (pid 1)] Task is starting.\n"
+            "2021-01-08 15:44:20.720 [1/start/1 (pid 1)] Task finished successfully.\n"
+            "2021-01-08 15:44:20.727 [1/step1/2 (pid 1)] Task is starting.\n"
+            "2021-01-08 15:44:26.722 [1/step1/2 (pid 1)] Task finished successfully.\n"
+            "2021-01-08 15:44:26.729 [1/step2/3 (pid 1)] Task is starting.\n"
+            "2021-01-08 15:44:31.619 [1/step2/3 (pid 1)] Task finished successfully.\n"
+            "2021-01-08 15:44:31.626 [1/step3/4 (pid 1)] Task is starting.\n"
+            "2021-01-08 15:44:33.119 [1/step3/4 (pid 1)] Task finished successfully.\n"
+            "2021-01-08 15:44:38.481 [1/step4/5 (pid 1)] Task is starting.\n"
+            "2021-01-08 15:54:40.042 [1/step4/5 (pid 1)] Task finished successfully.\n"
+            "2021-01-08 15:55:04.345 [1/end/13 (pid 1)] Task is starting.\n"
+            "2021-01-08 15:55:05.651 [1/end/13 (pid 1)] Task finished successfully.\n"
+            "2021-01-08 15:55:05.652 Done!'"
+        )
+        run1.save()
+        self.assertIsNone(run1.profile())
+
+        run1.task_exitcode = 0
+        run1.save()
+
+        expected = {
+            "start": 1,
+            "step1": 5,
+            "step2": 4,
+            "step3": 1,
+            "step4": 601,
+            "end": 1,
+        }
+        self.assertEqual(expected, run1.profile())
+
+        output = io.StringIO()
+        with redirect_stdout(output):
+            self.assertIsNone(run1.profile(print_results=True))
+
+        expected = (
+            "start    1 seconds 0.2%\n"
+            "step1    5 seconds 0.8%\n"
+            "step2    4 seconds 0.7%\n"
+            "step3    1 seconds 0.2%\n"
+            "\x1b[41;37mstep4  601 seconds 98.0%\x1b[m\n"
+            "end      1 seconds 0.2%\n"
+        )
+        self.assertEqual(expected, output.getvalue())
+
     def test_scanpipe_codebase_resource_model_methods(self):
         resource = CodebaseResource.objects.create(
             project=self.project1, path="filename.ext"