From 656ba0c5f5e1ebbd7d2a777651bb2e0e00be9b78 Mon Sep 17 00:00:00 2001
From: Dilyan Marinov <91800778+DeltaMichael@users.noreply.github.com>
Date: Thu, 1 Feb 2024 16:56:53 +0200
Subject: [PATCH] vdk-core: relevant info in step result (#3062)

## Why?

Sometimes the exact errors in user code are hard to find in the stack
trace

## What?

Print relevant information from stack trace in step results

Example:

On error in step 20_python_step.py, the short result summary looks like
this

```
2024-01-29 15:14:14,736 [VDK] hello-world [INFO ] vdk.internal.builtin_plugins.r           cli_run.py:166  __log_short_exec[id:3a22ec2e-d6e6-4f94-96c0-122955930cbb-1706534052-abffd]- Job execution result: ERROR
Step results:
10_sql_step.sql - SUCCESS
20_python_step.py - ERROR
  File "/Users/mdilyan/Projects/vdk-playground/hello-world/20_python_step.py", line 27, in run
    raise ValueError("No value in this")
ValueError: No value in this
```

## How was this tested?

Ran locally
Functional test

## What kind of change is this?

Feature/non-breaking

Signed-off-by: Dilyan Marinov <mdilyan@vmware.com>
Co-authored-by: Dilyan Marinov <mdilyan@vmware.com>
---
 .../internal/builtin_plugins/run/cli_run.py   | 33 +++++++++++++++++--
 .../tests/functional/run/test_run_errors.py   | 15 +++++++++
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/projects/vdk-core/src/vdk/internal/builtin_plugins/run/cli_run.py b/projects/vdk-core/src/vdk/internal/builtin_plugins/run/cli_run.py
index cf03d6a05e..407fbbc83f 100644
--- a/projects/vdk-core/src/vdk/internal/builtin_plugins/run/cli_run.py
+++ b/projects/vdk-core/src/vdk/internal/builtin_plugins/run/cli_run.py
@@ -7,6 +7,7 @@
 import pathlib
 import re
 import sys
+import traceback
 from typing import cast
 from typing import Dict
 from typing import List
@@ -18,6 +19,7 @@
 from vdk.internal.builtin_plugins.run import job_input_error_classifier
 from vdk.internal.builtin_plugins.run.data_job import DataJobFactory
 from vdk.internal.builtin_plugins.run.execution_results import ExecutionResult
+from vdk.internal.builtin_plugins.run.execution_results import StepResult
 from vdk.internal.builtin_plugins.run.execution_tracking import (
     ExecutionTrackingPlugin,
 )
@@ -143,15 +145,40 @@ def __log_exec_result(self, execution_result: ExecutionResult) -> None:
             log.info(f"Data Job execution summary: {execution_result}")
 
     @staticmethod
-    def __log_short_exec_result(execution_result):
+    def __log_short_exec_result(execution_result: ExecutionResult):
+        def extract_relevant_lines(step: StepResult) -> List[str]:
+            out = []
+            if step.exception:
+                call_list = traceback.format_tb(step.exception.__traceback__)
+
+                for line_index, line in enumerate(call_list):
+                    # Check if the step name is in the line
+                    if step.name in line:
+                        out.append(line)
+                        next_line_index = line_index + 1
+                        # Pull in subsequent relevant lines
+                        # that do not come from another file
+                        while next_line_index < len(call_list) and not re.match(
+                            r'^\s*File "', call_list[next_line_index]
+                        ):
+                            out.append(call_list[next_line_index])
+                            next_line_index += 1
+                # add the exception type and message
+                out.append(f"{type(step.exception).__name__}: {str(step.exception)}")
+            return out
+
         log.info(
             "Job execution result: "
             + execution_result.status.upper()
             + "\n"
-            + "Steps list:\n"
+            + "Step results:\n"
             + "".join(
                 [
-                    step.name + " - " + step.status.upper() + "\n"
+                    step.name
+                    + " - "
+                    + step.status.upper()
+                    + "\n"
+                    + "".join(extract_relevant_lines(step))
                     for step in execution_result.steps_list
                 ]
             )
diff --git a/projects/vdk-core/tests/functional/run/test_run_errors.py b/projects/vdk-core/tests/functional/run/test_run_errors.py
index 8ef6ff2839..27985a748f 100644
--- a/projects/vdk-core/tests/functional/run/test_run_errors.py
+++ b/projects/vdk-core/tests/functional/run/test_run_errors.py
@@ -229,3 +229,18 @@ def test_error_from_pandas_user_error(tmp_termination_msg_file):
         assert _get_job_status(tmp_termination_msg_file) == "User error"
         assert '"blamee": "User Error"' in result.output
         assert '"exception_name": "KeyError"' in result.output
+
+
+def test_run_simple_job_summary(tmp_termination_msg_file):
+    runner = CliEntryBasedTestRunner()
+
+    result: Result = runner.invoke(["run", util.job_path("fail-job")])
+    cli_assert_equal(1, result)
+    expected = f"""
+Step results:
+1_step.py - ERROR
+  File "{os.getcwd()}/tests/functional/run/jobs/fail-job/1_step.py", line 7, in run
+    raise ArithmeticError("cannot do math :(")
+ArithmeticError: cannot do math :(
+"""
+    assert expected in result.output