Skip to content

Commit

Permalink
Add a profile() method on the Run model for profiling pipeline execut…
Browse files Browse the repository at this point in the history
…ion #73

Signed-off-by: Thomas Druez <[email protected]>
  • Loading branch information
tdruez authored Jan 22, 2021
1 parent 1b3ed5d commit bde0513
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

- Add support for scancode pipes and Project name with whitespaces

- Add a profile() method on the Run model for profiling pipeline execution
https://github.com/nexB/scancode.io/issues/73

### v1.0.6 (2020-12-23)

- Add a management command to delete a Project and its related work directories
Expand Down
39 changes: 39 additions & 0 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import traceback
import uuid
from contextlib import suppress
from datetime import datetime
from pathlib import Path

from django.core import checks
Expand Down Expand Up @@ -498,6 +499,44 @@ def get_run_id(self):
if match:
return match.group("run_id")

def profile(self, print_results=False):
"""
Return computed execution times for each steps of this Run.
If `print_results` is provided, the results are printed to stdout.
"""
if not self.task_succeeded:
return

profiler = {}
for line in self.task_output.split("\n"):
if not line.endswith(("starting.", "successfully.")):
continue

segments = line.split()
line_date_str = " ".join(segments[0:2])
line_date = datetime.strptime(line_date_str, "%Y-%m-%d %H:%M:%S.%f")
step = segments[2].split("/")[1]

if line.endswith("starting."):
profiler[step] = line_date
elif line.endswith("successfully."):
start_date = profiler[step]
profiler[step] = (line_date - start_date).seconds

if not print_results:
return profiler

total_run_time = sum(profiler.values())
padding = max(len(name) for name in profiler.keys()) + 1
for step, step_execution_time in profiler.items():
percent = round(step_execution_time * 100 / total_run_time, 1)
output_str = f"{step:{padding}} {step_execution_time:>3} seconds {percent}%"
if percent > 50:
print("\033[41;37m" + output_str + "\033[m")
else:
print(output_str)


class CodebaseResourceQuerySet(ProjectRelatedQuerySet):
def status(self, status=None):
Expand Down
58 changes: 57 additions & 1 deletion scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

import io
import tempfile
import uuid
from contextlib import redirect_stdout
from datetime import datetime
from pathlib import Path
from unittest import mock
Expand Down Expand Up @@ -285,6 +286,61 @@ def test_scanpipe_run_model_queryset_methods(self):
qs = self.project1.runs.failed()
self.assertEqual([failed], list(qs))

def test_scanpipe_run_model_profile_method(self):
run1 = self.create_run()

self.assertIsNone(run1.profile())

run1.task_output = (
"Validating your flow...\n"
" The graph looks good!\n"
"Running pylint...\n"
" Pylint is happy!\n"
"2021-01-08 15:44:19.380 Workflow starting (run-id 1):\n"
"2021-01-08 15:44:19.385 [1/start/1 (pid 1)] Task is starting.\n"
"2021-01-08 15:44:20.720 [1/start/1 (pid 1)] Task finished successfully.\n"
"2021-01-08 15:44:20.727 [1/step1/2 (pid 1)] Task is starting.\n"
"2021-01-08 15:44:26.722 [1/step1/2 (pid 1)] Task finished successfully.\n"
"2021-01-08 15:44:26.729 [1/step2/3 (pid 1)] Task is starting.\n"
"2021-01-08 15:44:31.619 [1/step2/3 (pid 1)] Task finished successfully.\n"
"2021-01-08 15:44:31.626 [1/step3/4 (pid 1)] Task is starting.\n"
"2021-01-08 15:44:33.119 [1/step3/4 (pid 1)] Task finished successfully.\n"
"2021-01-08 15:44:38.481 [1/step4/5 (pid 1)] Task is starting.\n"
"2021-01-08 15:54:40.042 [1/step4/5 (pid 1)] Task finished successfully.\n"
"2021-01-08 15:55:04.345 [1/end/13 (pid 1)] Task is starting.\n"
"2021-01-08 15:55:05.651 [1/end/13 (pid 1)] Task finished successfully.\n"
"2021-01-08 15:55:05.652 Done!'"
)
run1.save()
self.assertIsNone(run1.profile())

run1.task_exitcode = 0
run1.save()

expected = {
"start": 1,
"step1": 5,
"step2": 4,
"step3": 1,
"step4": 601,
"end": 1,
}
self.assertEqual(expected, run1.profile())

output = io.StringIO()
with redirect_stdout(output):
self.assertIsNone(run1.profile(print_results=True))

expected = (
"start 1 seconds 0.2%\n"
"step1 5 seconds 0.8%\n"
"step2 4 seconds 0.7%\n"
"step3 1 seconds 0.2%\n"
"\x1b[41;37mstep4 601 seconds 98.0%\x1b[m\n"
"end 1 seconds 0.2%\n"
)
self.assertEqual(expected, output.getvalue())

def test_scanpipe_codebase_resource_model_methods(self):
resource = CodebaseResource.objects.create(
project=self.project1, path="filename.ext"
Expand Down

0 comments on commit bde0513

Please sign in to comment.