Skip to content

Commit

Permalink
vdk-core: Fix regression in job input error classifier (#577)
Browse files Browse the repository at this point in the history
* vdk-core: Fix regression in job input error classifier

As part of #566, a regression
was introduced which caused genuine Platform Errors to be classified as User Errors.

This change rollbacks the previous one, and fixes the regression while doing proper
user code error classification.

Testing Done: Unit tests

Signed-off-by: Andon Andonov <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and mrMoZ1 committed Dec 6, 2021
1 parent 8e31dd3 commit 389e35a
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ def run_job(
raise execution_result.get_exception()
except Exception as e:
errors.log_and_rethrow(
job_input_error_classifier.whom_to_blame(e, __file__),
job_input_error_classifier.whom_to_blame(
e, __file__, data_job_directory
),
log,
what_happened="Failed executing job.",
why_it_happened=errors.MSG_WHY_FROM_EXCEPTION(e),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def run_step(context: JobContext, step: Step) -> StepResult:
except Exception as e:
status = ExecutionStatus.ERROR
details = errors.MSG_WHY_FROM_EXCEPTION(e)
blamee = whom_to_blame(e, __file__)
blamee = whom_to_blame(e, __file__, context.job_directory)
exception = e
errors.log_exception(
blamee,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@
"""
import os
import traceback
from pathlib import Path
from typing import Optional

from vdk.internal.core import errors


def whom_to_blame(exception, executor_module):
def whom_to_blame(exception, executor_module, data_job_path: Optional[Path] = None):
"""
:param exception: Exception object that has led to Data Job failure.
:param executor_module: name of module that executes User Code.
:param data_job_path: path object of the data job directory.
:return: ResolvableBy.PLATFORM_ERROR if exception was recognized as Platform Team responsibility.
errors.ResolvableBy.USER_ERROR if exception was recognized as User Error.
"""
if isinstance(exception, errors.BaseVdkError):
return errors.find_whom_to_blame_from_exception(exception)
if is_user_error(exception):
if is_user_error(exception, data_job_path):
return errors.ResolvableBy.USER_ERROR
if _is_exception_from_vdk_code(
exception, executor_module
Expand All @@ -32,7 +35,6 @@ def whom_to_blame(exception, executor_module):


def _is_exception_from_vdk_code(exception, executor_module):
exception_in_vdk = False
executor_module = os.path.abspath(executor_module)
vdk_code_directory = os.path.dirname(executor_module)
call_list = traceback.format_tb(exception.__traceback__)
Expand All @@ -43,16 +45,18 @@ def _is_exception_from_vdk_code(exception, executor_module):
for call in call_list:
caller_module = call.split('"')[1] # Extract module path from stacktrace call.
if vdk_code_directory in caller_module and caller_module != executor_module:
exception_in_vdk = True
return True
elif (
caller_module == executor_module
): # User code starts from this module always.
return False
break

return exception_in_vdk
return False


def is_user_error(received_exception: Exception) -> bool:
def is_user_error(
received_exception: Exception, data_job_path: Optional[Path] = None
) -> bool:
"""
Returns if exception should be user error
"""
Expand All @@ -61,6 +65,7 @@ def is_user_error(received_exception: Exception) -> bool:
or _is_new_thread_error(received_exception)
or _is_timeout_error(received_exception)
or _is_memory_limit_exceeded(received_exception)
or _is_direct_user_code_error(received_exception, job_path=data_job_path)
)


Expand Down Expand Up @@ -94,3 +99,22 @@ def _is_timeout_error(exception):
classname_with_package=".*TimeoutError.*",
exception_message_matcher_regex=".*Duration of Data Job exceeded.*seconds.*",
)


def _is_direct_user_code_error(exception: Exception, job_path: Optional[Path]):
data_job_path = str(job_path) if job_path else None
if not data_job_path:
return False

# Get exception traceback as a list
call_list = traceback.format_tb(exception.__traceback__)
if len(call_list) == 0:
return False

last_call = call_list[-1]
last_caller_module = last_call.split('"')[1]

# Check if the data job path is contained in the last exception call from the exception
# traceback. If it is, it is safe to assume that the exception was generated directly in
# user code, and not somewhere else in the vdk code or one of vdk's plugins.
return True if data_job_path in last_caller_module else False
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import traceback
import unittest
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch

Expand Down Expand Up @@ -98,12 +99,44 @@ def test_unknown_generic_error(self, mock_is_user_error, mock_traceback_format_t
errors.ResolvableBy.USER_ERROR,
)

# Test generic errors in user code that are not specifically recognised by VDK.
@patch(f"{traceback.format_tb.__module__}.{traceback.format_tb.__name__}")
def test_unknown_user_code_error(self, mock_traceback_format_tb):
data_job_path = Path("/example_project/my-second-job")
exception = Exception("User Error")

mock_traceback_format_tb.return_value = self.GENERIC_USER_ERROR_STACKTRACE
self.assertEqual(
whom_to_blame(exception, self.EXECUTOR_MODULE),
whom_to_blame(exception, self.EXECUTOR_MODULE, data_job_path),
errors.ResolvableBy.USER_ERROR,
)

# Test errors in user code that cannot be recognised by VDK due to lack of valid job_path.
@patch(f"{traceback.format_tb.__module__}.{traceback.format_tb.__name__}")
def test_unknown_user_code_error_with_none_job_path(self, mock_traceback_format_tb):
data_job_path = None
exception = Exception("Should be Platform Error")

mock_traceback_format_tb.return_value = self.GENERIC_USER_ERROR_STACKTRACE
self.assertEqual(
whom_to_blame(exception, self.EXECUTOR_MODULE, data_job_path),
errors.ResolvableBy.PLATFORM_ERROR,
)

# Generic error thrown by job_input that is not specifically recognised by VDK should be VAC error.
@patch(f"{traceback.format_tb.__module__}.{traceback.format_tb.__name__}")
@patch(f"{is_user_error.__module__}.{is_user_error.__name__}")
def test_job_input_generic_error(
self, mock_is_user_error, mock_traceback_format_tb
):
exception = Exception("!")
mock_is_user_error.return_value = False
mock_traceback_format_tb.return_value = self.PLATFORM_ERROR_STACKTRACE
self.assertEqual(
whom_to_blame(exception, self.EXECUTOR_MODULE),
errors.ResolvableBy.PLATFORM_ERROR,
)


class UserErrorClassification(unittest.TestCase):
def test_user_error_classification(self):
Expand Down

0 comments on commit 389e35a

Please sign in to comment.