diff --git a/projects/vdk-core/src/vdk/internal/builtin_plugins/run/file_based_step.py b/projects/vdk-core/src/vdk/internal/builtin_plugins/run/file_based_step.py index 923312183b..8066258652 100644 --- a/projects/vdk-core/src/vdk/internal/builtin_plugins/run/file_based_step.py +++ b/projects/vdk-core/src/vdk/internal/builtin_plugins/run/file_based_step.py @@ -105,6 +105,7 @@ def run(job_input) finally: if success: log.info("Exiting %s#run(...) SUCCESS" % filename) + errors.resolvable_context().mark_all_resolved() else: log.error("Exiting %s#run(...) FAILURE" % filename) log.warn( diff --git a/projects/vdk-core/src/vdk/internal/builtin_plugins/termination_message/writer.py b/projects/vdk-core/src/vdk/internal/builtin_plugins/termination_message/writer.py index b0864bfeaf..342f9888c2 100644 --- a/projects/vdk-core/src/vdk/internal/builtin_plugins/termination_message/writer.py +++ b/projects/vdk-core/src/vdk/internal/builtin_plugins/termination_message/writer.py @@ -30,8 +30,8 @@ def vdk_configure(self, config_builder: ConfigurationBuilder): @hookimpl def vdk_exit(self, context: CoreContext, exit_code: int): self.write_termination_message( - errors.get_blamee_overall(), # TODO: get this from context - errors.get_blamee_overall_user_error(), # TODO: get this from context + errors.get_blamee_overall(), + errors.get_blamee_overall_user_error(), context.configuration, ) diff --git a/projects/vdk-core/src/vdk/internal/core/errors.py b/projects/vdk-core/src/vdk/internal/core/errors.py index f4a7395eee..fc65047f2e 100644 --- a/projects/vdk-core/src/vdk/internal/core/errors.py +++ b/projects/vdk-core/src/vdk/internal/core/errors.py @@ -10,6 +10,7 @@ """ from __future__ import annotations +import enum import logging import re import sys @@ -18,19 +19,31 @@ from enum import Enum from logging import Logger from types import TracebackType -from typing import Any from typing import cast +log = logging.getLogger(__name__) +MSG_CONSEQUENCE_DELEGATING_TO_CALLER__LIKELY_EXECUTION_FAILURE = ( + "I'm rethrowing this exception to my caller to process." + "Most likely this will result in failure of current Data Job." +) +MSG_CONSEQUENCE_TERMINATING_APP = ( + "The provided Data Job will not be executed. Terminating application." +) +MSG_COUNTERMEASURE_FIX_PARENT_EXCEPTION = ( + "See contents of the exception and fix the problem that causes it." +) + +@enum.unique class ResolvableBy(str, Enum): """ - Type of errors being thrown by VDK during execution of some command based on who is responsible for resolving/fixing them + Type of errors being thrown by VDK during execution of some command. Those are: - * PLATFORM_ERROR - for infrastructure errors that can and should be fixed by SRE Team, Platform team, operating the infrastructure and services. - * USER_ERROR - Errors in user code/configuration, that should be fixed by the end user (or job owner) for example: supplied bad arguments, bug in user code. - * CONFIG_ERROR - Errors in the configuration provided to VDK. Should be fixed by Platform if run in Platfrom infrastructure, or by end user, when run locally. + * PLATFORM_ERROR - infrastructure errors + * USER_ERROR - errors in user code/configuration + * CONFIG_ERROR - errors in the configuration provided to VDK """ PLATFORM_ERROR = "Platform error" @@ -38,15 +51,105 @@ class ResolvableBy(str, Enum): CONFIG_ERROR = "Configuration error" -# TODO: instead of global variable set this in JobContext -# (one benefit this way we can run even multiple jobs/templates in the same process) -# The key is 'blamee' (i.e. responsible for the error fixing) and the value is a list of corresponding ErrorMessages -BLAMEES: dict[str, list[Any]] = defaultdict(list) +@enum.unique +class ResolvableByActual(str, Enum): + """ + Who is responsible for resolving/fixing the error. -# overide when running in kubernetes -CONFIGURATION_ERRORS_ARE_TO_BE_RESOLVED_BY = ResolvableBy.PLATFORM_ERROR + Each Resolvable error type, along with the corresponding accountable: -log = logging.getLogger(__name__) + * PLATFORM_ERROR - should be fixed by the PLATFORM (SRE Team, Platform team, operating the infrastructure and services). + * USER_ERROR - should be fixed by the end USER (or data job owner), for example: supplied bad arguments, bug in user code. + * CONFIG_ERROR that occurred during: + - platform run (in case the data job runs on platfrom infrastructure), is handled by the PLATFORM; + - local run (in case the data job runs on local end user infrastructure), is handled by the USER. + + Returns: + * PLATFORM - accountable for infrastructure errors, or configuration errors occurred during a platform run; + * USER - accountable for errors in user code/configuration, or configuration errors occurred during a local run. + """ + + PLATFORM = "Platform" + USER = "User" + + +# overwrite when running in kubernetes +CONFIGURATION_ERRORS_ARE_TO_BE_RESOLVED_BY = ResolvableByActual.PLATFORM + + +class Resolvable: + """ + Contains context of a resolvable error + resolvable_by: Indicates the resolvable type. + resolvable_by_actual: Who is actually responsible for resolving it + error_message: the error message + exception: the exception related to the error + resolved: indicate if the error is resolved (for example error may be handled in user code and they are considred resolved). + It should be use for informative purposes. It may be None/empty (for example if error originates from a new thread spawned by a job step) + """ + + def __init__( + self, + resolvable_by: ResolvableBy, + resolvable_by_actual: ResolvableByActual, + error_message: ErrorMessage, + exception: BaseException, + resolved: bool = False, + ): + self.resolvable_by = resolvable_by + self.resolvable_by_actual = resolvable_by_actual + self.error_message = error_message + self.exception = exception + self.resolved = resolved + + +class ResolvableContext: + """ + A global registry for resolvable entries lookup, available immediately upon class loading. + Purposed for keeping track of any errors that may occur before, during or after CoreContext/JobContext initialization. + """ + + # The key is 'blamee' (i.e. responsible for the error fixing) and the value is a list of corresponding Resolvables + resolvables: dict[ResolvableByActual, list[Resolvable]] + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance.__init__() + return cls._instance + + @classmethod + def instance(cls): + return cls.__new__(cls) + + def __init__(self): + self.resolvables = defaultdict(list) + + def add(self, resolvable: Resolvable) -> None: + """ + Register a resolvable entry in the context. + """ + resolvable_by_actual = resolvable.resolvable_by_actual + if resolvable_by_actual not in self.resolvables.keys(): + self.resolvables[resolvable_by_actual] = [] + self.resolvables[resolvable_by_actual].append(resolvable) + + def clear(self) -> None: + """ + Clear so far recorded records - those are considered intermediate and resolved. + For example after successful completion of a step. + """ + self.resolvables.clear() + + def mark_all_resolved(self): + for resolvable_by_actual in self.resolvables.values(): + for resolvable in resolvable_by_actual: + resolvable.resolved = True + + +def resolvable_context(): + return ResolvableContext.instance() class BaseVdkError(Exception): @@ -157,30 +260,38 @@ def to_html(self) -> str: return self._to_string(self._get_template("
")) -def get_blamee_overall() -> str | None: +def get_blamee_overall() -> ResolvableByActual | None: """ Finds who is responsible for fixing the error/s. Returns: None - if there were no errors - ResolvableBy.PLATFORM_ERROR - if during the run there were only Platfrom exception - ResolvableBy.USER_ERROR - if during the run there was at least one job owner exceptions + ResolvableByActual.PLATFORM - if during the run there were only Platfrom exceptions + ResolvableByActual.USER - if during the run there was at least one job owner exceptions - The reason it is set to ResolvableBy.USER_ERROR if there is at least one job owner is: + The reason it is set to ResolvableByActual.USER if there is at least one job owner is: VDK defaults the error to be resolved by Platform team until it can determine for certain that it's an issue in the job code. There might be multiple components logging error (for example in exception propagation), and if at least one component says error is in the job code, then we set to be resolved by data job owner. """ - if len(BLAMEES) == 0: + if len(resolvable_context().resolvables) == 0: return None - else: - return ( - ResolvableBy.USER_ERROR - if ResolvableBy.USER_ERROR in BLAMEES - else ResolvableBy.PLATFORM_ERROR - ) + + def filter(resolvable_by_actual): + filtered = [ + i + for i in resolvable_context().resolvables.get(resolvable_by_actual) + if not i.resolved + ] + return resolvable_by_actual if filtered else None + + if ResolvableByActual.USER in resolvable_context().resolvables: + return filter(ResolvableByActual.USER) + + if ResolvableByActual.PLATFORM in resolvable_context().resolvables: + return filter(ResolvableByActual.PLATFORM) def get_blamee_overall_user_error() -> str: @@ -189,84 +300,48 @@ def get_blamee_overall_user_error() -> str: :return: Empty string if the owner is not the overall blamee - An ErrorMessage instance + An ErrorMessage instance to string """ blamee = get_blamee_overall() - if blamee is None or blamee == ResolvableBy.PLATFORM_ERROR: + if blamee is None or blamee != ResolvableByActual.USER: return "" - blamee_errors = BLAMEES.get(blamee, []) - return str(blamee_errors[0]) - - -def get_error_type() -> str | None: - """ - :return: "User" or "Platform" - """ - blamee = get_blamee_overall() - return ( - "User" if blamee == ResolvableBy.USER_ERROR else "Platform" if blamee else None + blamee_user_errors = resolvable_context().resolvables.get( + ResolvableByActual.USER, [] ) + return str(blamee_user_errors[0].error_message) if blamee_user_errors else None -def _build_message_for_end_user( +def log_exception( to_be_fixed_by: ResolvableBy, + log: Logger, what_happened: str, why_it_happened: str, consequences: str, countermeasures: str, -) -> ErrorMessage: - error = "" - if ResolvableBy.PLATFORM_ERROR == to_be_fixed_by: - error = " Platform service error " - elif ResolvableBy.USER_ERROR == to_be_fixed_by: - error = "n error in data job code " - elif ResolvableBy.CONFIG_ERROR == to_be_fixed_by: - error = " configuration error " - - current_error_responsible_for_resolution = _error_type_to_actual_resolver( - to_be_fixed_by - ) - # statement to add the key in the dictionary (if not already there), - # for get_blamee_overall()' to be calculated correctly - BLAMEES[current_error_responsible_for_resolution] - responsible_for_resolution = get_blamee_overall() - - msg = ErrorMessage( - "A{} occurred. The error should be resolved by {}. Here are the details:".format( - error, responsible_for_resolution - ), - what_happened.strip(), - why_it_happened.strip(), - consequences.strip(), - countermeasures.strip(), - ) - - BLAMEES[current_error_responsible_for_resolution].append(msg) - return msg - - -def get_caller_stacktrace() -> str: + exception: BaseException, +) -> None: """ - :return: stacktrace excluding this method (hence caller stacktrace) + Log message only if it has not been logged already. + Does not throw it again. """ - info = sys.exc_info() - tb = cast(TracebackType, info[2]) - f = tb.tb_frame.f_back - lst = ["Traceback (most recent call first):\n"] - fstack = traceback.extract_stack(f) - fstack.reverse() - lst = lst + traceback.format_list(fstack) - lines = "" - for line in lst: - lines = lines + line - return lines + if __error_is_logged(exception): + return + resolvable_by_actual = _error_type_to_actual_resolver(to_be_fixed_by) + error_message = _build_message_for_end_user( + to_be_fixed_by, + resolvable_by_actual, + what_happened, + why_it_happened, + consequences, + countermeasures, + ) + resolvable_context().add( + Resolvable(to_be_fixed_by, resolvable_by_actual, error_message, exception) + ) -def _error_type_to_actual_resolver(to_be_fixed_by: ResolvableBy) -> str: - if ResolvableBy.CONFIG_ERROR == to_be_fixed_by: - return CONFIGURATION_ERRORS_ARE_TO_BE_RESOLVED_BY - else: - return to_be_fixed_by + __set_error_is_logged(exception) + log.exception(error_message) def log_and_throw( @@ -280,70 +355,39 @@ def log_and_throw( """ Log error message and then throw it to be handled up the stack. """ - msg = _build_message_for_end_user( - to_be_fixed_by, what_happened, why_it_happened, consequences, countermeasures - ) - - try: - if ResolvableBy.PLATFORM_ERROR == to_be_fixed_by: - raise PlatformServiceError(msg) - elif ResolvableBy.USER_ERROR == to_be_fixed_by: - raise UserCodeError(msg) - elif ResolvableBy.CONFIG_ERROR == to_be_fixed_by: - raise VdkConfigurationError(msg) - else: - raise Exception( - "BUG! Fix me!" - ) # What type is the error that caused this and whom to blame Platform or Data Jobs Developer? - except BaseVdkError as e: - lines = get_caller_stacktrace() - log.error(str(msg) + "\n" + lines) - __set_error_is_logged(e) - raise - -def log_exception( - to_be_fixed_by: ResolvableBy, - log: Logger, - what_happened: str, - why_it_happened: str, - consequences: str, - countermeasures: str, - exception: BaseException, -) -> None: - """ - Log message only if it has not been logged already. - Does not throw it again. - """ - if __error_is_logged(exception): - return - msg = _build_message_for_end_user( - to_be_fixed_by, what_happened, why_it_happened, consequences, countermeasures + resolvable_by_actual = _error_type_to_actual_resolver(to_be_fixed_by) + error_message = _build_message_for_end_user( + to_be_fixed_by, + resolvable_by_actual, + what_happened, + why_it_happened, + consequences, + countermeasures, ) - __set_error_is_logged(exception) - log.exception(msg) - -def wrap_exception_if_not_already( - to_be_fixed_by: ResolvableBy, msg: ErrorMessage, exception: BaseException -): - if isinstance(exception, BaseVdkError): - # already wrapped - return exception - - # TODO: how to assign cause (new_ex from old_ex) ? + exception: BaseVdkError if ResolvableBy.PLATFORM_ERROR == to_be_fixed_by: - return PlatformServiceError(msg) + exception = PlatformServiceError(error_message) elif ResolvableBy.USER_ERROR == to_be_fixed_by: - return UserCodeError(msg) + exception = UserCodeError(error_message) elif ResolvableBy.CONFIG_ERROR == to_be_fixed_by: - return VdkConfigurationError(msg) + exception = VdkConfigurationError(error_message) else: - log.warning( - "Unknown to_be_fixed_by type. " - "This seems like a bug. We cannot wrap exception and return original one " + raise Exception( + "BUG! Fix me!" + ) # What type is the error that caused this and whom to blame Platform or Data Jobs Developer? + + try: + raise exception + except BaseVdkError as e: + resolvable_context().add( + Resolvable(to_be_fixed_by, resolvable_by_actual, error_message, e) ) - return exception + lines = _get_caller_stacktrace() + log.error(str(error_message) + "\n" + lines) + __set_error_is_logged(e) + raise def log_and_rethrow( @@ -369,39 +413,39 @@ def log_and_rethrow( it will wrap it in corresponding BaseVdkError exception based on to_be_fixed_by parameter """ - msg = _build_message_for_end_user( - to_be_fixed_by, what_happened, why_it_happened, consequences, countermeasures + resolvable_by_actual = _error_type_to_actual_resolver(to_be_fixed_by) + error_message = _build_message_for_end_user( + to_be_fixed_by, + resolvable_by_actual, + what_happened, + why_it_happened, + consequences, + countermeasures, ) + to_be_raised_exception = exception if wrap_in_vdk_error: - to_be_raised_exception = wrap_exception_if_not_already( - to_be_fixed_by, msg, exception + to_be_raised_exception = _wrap_exception_if_not_already( + to_be_fixed_by, error_message, exception ) if not __error_is_logged(exception): - log.exception(msg) + log.exception(error_message) __set_error_is_logged(exception) - if wrap_in_vdk_error: - raise to_be_raised_exception from exception - else: - raise exception - - -def __error_is_logged(exception: BaseException) -> bool: - """Check if exception has custom added attribute is_logged""" - return hasattr(exception, "is_logged") - - -def __set_error_is_logged(exception: BaseException): - setattr(exception, "is_logged", True) + try: + raise to_be_raised_exception from exception if wrap_in_vdk_error else exception + except Exception as e: + resolvable_context().add( + Resolvable(to_be_fixed_by, resolvable_by_actual, error_message, e) + ) + raise def find_whom_to_blame_from_exception(exception: Exception) -> ResolvableBy: """ Tries to determine if it's user or platform error """ - if issubclass(type(exception), UserCodeError): return ResolvableBy.USER_ERROR if issubclass(type(exception), VdkConfigurationError): @@ -410,28 +454,15 @@ def find_whom_to_blame_from_exception(exception: Exception) -> ResolvableBy: ) # TODO find out if this is a local or platform deployment and fix this line. if issubclass(type(exception), PlatformServiceError): return ResolvableBy.PLATFORM_ERROR - return ResolvableBy.PLATFORM_ERROR -MSG_CONSEQUENCE_DELEGATING_TO_CALLER__LIKELY_EXECUTION_FAILURE = ( - "I'm rethrowing this exception to my caller to process." - "Most likely this will result in failure of current Data Job." -) -MSG_CONSEQUENCE_TERMINATING_APP = ( - "The provided Data Job will not be executed. Terminating application." -) -MSG_COUNTERMEASURE_FIX_PARENT_EXCEPTION = ( - "See contents of the exception and fix the problem that causes it." -) - - -def get_exception_message(exception: Exception) -> str: +def _get_exception_message(exception: Exception) -> str: """Returns the message part of an exception as string""" return str(exception).strip() -class CustomMessageExceptionDecorator: +class _CustomMessageExceptionDecorator: """ Provides custom message for an exception. @@ -473,13 +504,14 @@ def MSG_WHY_FROM_EXCEPTION(exception: Exception) -> str: """ Try to figure what is the reason for the failure (why) from the exception and return as a reason. """ - custom_message = CustomMessageExceptionDecorator(exception).get_custom_message() - if custom_message: - return custom_message - else: - return "An exception occurred, exception message was: {}".format( - get_exception_message(exception) + custom_message = _CustomMessageExceptionDecorator(exception).get_custom_message() + return ( + custom_message + if custom_message + else "An exception occurred, exception message was: {}".format( + _get_exception_message(exception) ) + ) def exception_matches( @@ -503,7 +535,7 @@ def exception_matches( if not (grp == classname): return False - msg = get_exception_message(e) + msg = _get_exception_message(e) match = msgMatcher.match(msg) if None is match: return False @@ -511,11 +543,91 @@ def exception_matches( return grp == msg -def clear_intermediate_errors() -> None: - """ - Clear so far recorded records - those are considered intermediate and resolved. +def _build_message_for_end_user( + to_be_fixed_by: ResolvableBy, + to_be_fixed_by_actual: ResolvableByActual, + what_happened: str, + why_it_happened: str, + consequences: str, + countermeasures: str, +) -> ErrorMessage: + error = "" + if ResolvableBy.PLATFORM_ERROR == to_be_fixed_by: + error = " Platform service error " + elif ResolvableBy.USER_ERROR == to_be_fixed_by: + error = "n error in data job code " + elif ResolvableBy.CONFIG_ERROR == to_be_fixed_by: + error = " configuration error " + + return ErrorMessage( + "A{} occurred. The error should be resolved by {}. Here are the details:".format( + error, to_be_fixed_by_actual + ), + what_happened.strip(), + why_it_happened.strip(), + consequences.strip(), + countermeasures.strip(), + ) - For example after successful completion of a step. - # TODO: better keep errors in context and not globally! + +def _get_caller_stacktrace(exception: BaseException = None) -> str: """ - BLAMEES.clear() + :return: stacktrace excluding this method (hence caller stacktrace) + """ + tb = ( + exception.__traceback__ + if exception and exception.__traceback__ + else cast(TracebackType, sys.exc_info()[2]) + ) + f = tb.tb_frame.f_back + lst = ["Traceback (most recent call first):\n"] + fstack = traceback.extract_stack(f) + fstack.reverse() + lst = lst + traceback.format_list(fstack) + lines = "" + for line in lst: + lines = lines + line + return lines + + +def _error_type_to_actual_resolver(to_be_fixed_by: ResolvableBy) -> ResolvableByActual: + if ResolvableBy.PLATFORM_ERROR == to_be_fixed_by: + return ResolvableByActual.PLATFORM + if ResolvableBy.USER_ERROR == to_be_fixed_by: + return ResolvableByActual.USER + if ResolvableBy.CONFIG_ERROR == to_be_fixed_by: + return CONFIGURATION_ERRORS_ARE_TO_BE_RESOLVED_BY + raise Exception( + "BUG! Fix me!" + ) # What type is the error that caused this and whom to blame, Platform or Data Jobs Developer? + + +def _wrap_exception_if_not_already( + to_be_fixed_by: ResolvableBy, msg: ErrorMessage, exception: BaseException +): + if isinstance(exception, BaseVdkError): + # already wrapped + return exception + + # TODO: how to assign cause (new_ex from old_ex) ? + if ResolvableBy.PLATFORM_ERROR == to_be_fixed_by: + return PlatformServiceError(msg) + elif ResolvableBy.USER_ERROR == to_be_fixed_by: + return UserCodeError(msg) + elif ResolvableBy.CONFIG_ERROR == to_be_fixed_by: + return VdkConfigurationError(msg) + else: + log.warning( + "Unknown to_be_fixed_by type. " + "This seems like a bug. We cannot wrap exception and return original one " + ) + return exception + + +def __error_is_logged(exception: BaseException) -> bool: + """Check if exception has custom added attribute is_logged""" + return hasattr(exception, "is_logged") + + +def __set_error_is_logged(exception: BaseException): + setattr(exception, "is_logged", True) diff --git a/projects/vdk-core/tests/functional/run/jobs/simple-query-failed-handled/10_query_then_handle_exceptions.py b/projects/vdk-core/tests/functional/run/jobs/simple-query-failed-handled/10_query_then_handle_exceptions.py new file mode 100644 index 0000000000..763f9346f6 --- /dev/null +++ b/projects/vdk-core/tests/functional/run/jobs/simple-query-failed-handled/10_query_then_handle_exceptions.py @@ -0,0 +1,10 @@ +# Copyright 2021 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +from vdk.api.job_input import IJobInput + + +def run(job_input: IJobInput): + try: + job_input.execute_query("SELECT Syntax error") + except: + pass # exception handled by user diff --git a/projects/vdk-core/tests/functional/run/test_run_errors.py b/projects/vdk-core/tests/functional/run/test_run_errors.py index 601c22ea3d..8ad2598e6b 100644 --- a/projects/vdk-core/tests/functional/run/test_run_errors.py +++ b/projects/vdk-core/tests/functional/run/test_run_errors.py @@ -14,10 +14,15 @@ from vdk.internal.builtin_plugins.connection.execution_cursor import ExecutionCursor from vdk.internal.builtin_plugins.run.job_context import JobContext from vdk.internal.core import errors +from vdk.internal.core.errors import ResolvableByActual from vdk.plugin.test_utils.util_funcs import cli_assert_equal from vdk.plugin.test_utils.util_funcs import CliEntryBasedTestRunner +from vdk.plugin.test_utils.util_plugins import DB_TYPE_SQLITE_MEMORY +from vdk.plugin.test_utils.util_plugins import SqLite3MemoryDbPlugin from vdk.plugin.test_utils.util_plugins import TestPropertiesPlugin +VDK_DB_DEFAULT_TYPE = "VDK_DB_DEFAULT_TYPE" + @pytest.fixture(autouse=True) def tmp_termination_msg_file(tmpdir) -> pathlib.Path: @@ -33,7 +38,7 @@ def tmp_termination_msg_file(tmpdir) -> pathlib.Path: def test_initialize_step_user_error(tmp_termination_msg_file): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() runner = CliEntryBasedTestRunner() result: Result = runner.invoke(["run", util.job_path("syntax-error-job")]) @@ -42,16 +47,22 @@ def test_initialize_step_user_error(tmp_termination_msg_file): def test_run_user_error(tmp_termination_msg_file): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() runner = CliEntryBasedTestRunner() result: Result = runner.invoke(["run", util.job_path("fail-job")]) cli_assert_equal(1, result) assert _get_job_status(tmp_termination_msg_file) == "User error" + assert errors.get_blamee_overall() == ResolvableByActual.USER + actual_error = errors.resolvable_context().resolvables.get(ResolvableByActual.USER)[ + 0 + ] + assert actual_error.resolved is False + def test_run_user_error_fail_job_library(tmp_termination_msg_file): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() runner = CliEntryBasedTestRunner() result: Result = runner.invoke(["run", util.job_path("fail-job-indirect-library")]) @@ -60,7 +71,7 @@ def test_run_user_error_fail_job_library(tmp_termination_msg_file): def test_run_user_error_fail_job_ingest_iterator(tmp_termination_msg_file): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() runner = CliEntryBasedTestRunner() result: Result = runner.invoke(["run", util.job_path("fail-job-ingest-iterator")]) @@ -69,7 +80,7 @@ def test_run_user_error_fail_job_ingest_iterator(tmp_termination_msg_file): def test_run_init_fails(tmp_termination_msg_file: pathlib.Path): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() class InitFailsPlugin: @staticmethod @@ -83,9 +94,15 @@ def initialize_job(self, context: JobContext): cli_assert_equal(1, result) assert _get_job_status(tmp_termination_msg_file) == "Platform error" + assert errors.get_blamee_overall() == ResolvableByActual.PLATFORM + actual_error = errors.resolvable_context().resolvables.get( + ResolvableByActual.PLATFORM + )[0] + assert actual_error.resolved is False + def test_run_exception_handled(tmp_termination_msg_file: pathlib.Path): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() class ExceptionHandler: @staticmethod @@ -100,7 +117,7 @@ def vdk_exception(self, exception: Exception) -> bool: def test_run_job_plugin_fails(tmp_termination_msg_file): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() class RunJobFailsPlugin: @staticmethod @@ -116,7 +133,7 @@ def run_job(context: JobContext) -> None: def test_run_platform_error_properties(tmp_termination_msg_file): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() class FailingPropertiesServiceClient(IPropertiesServiceClient): def read_properties(self, job_name: str, team_name: str) -> Dict: @@ -139,7 +156,7 @@ def write_properties( def test_run_platform_error_sql(tmp_termination_msg_file): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() class QueryFailingPlugin: @hookimpl @@ -155,3 +172,16 @@ def db_connection_execute_operation(execution_cursor: ExecutionCursor): def _get_job_status(tmp_termination_msg_file): return json.loads(tmp_termination_msg_file.read_text())["status"] + + +@mock.patch.dict(os.environ, {VDK_DB_DEFAULT_TYPE: DB_TYPE_SQLITE_MEMORY}) +def test_user_error_handled(tmp_termination_msg_file): + errors.resolvable_context().clear() + db_plugin = SqLite3MemoryDbPlugin() + runner = CliEntryBasedTestRunner(db_plugin) + + result: Result = runner.invoke( + ["run", util.job_path("simple-query-failed-handled")] + ) + cli_assert_equal(0, result) + assert (json.loads(tmp_termination_msg_file.read_text())["status"]) == "Success" diff --git a/projects/vdk-core/tests/functional/run/test_run_notifications.py b/projects/vdk-core/tests/functional/run/test_run_notifications.py index 7b0a1e0b0c..dcc465af67 100644 --- a/projects/vdk-core/tests/functional/run/test_run_notifications.py +++ b/projects/vdk-core/tests/functional/run/test_run_notifications.py @@ -34,7 +34,7 @@ def __get_smtp_env(smtpd: SMTPDFix): def test_run_successfull(smtpd: SMTPDFix): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() with mock.patch.dict( os.environ, { @@ -54,7 +54,7 @@ def test_run_successfull(smtpd: SMTPDFix): def test_run_successfull_notify_multiple_users(smtpd: SMTPDFix): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() with mock.patch.dict( os.environ, { @@ -77,7 +77,7 @@ def test_run_successfull_notify_multiple_users(smtpd: SMTPDFix): @mock.patch.dict(os.environ, {"VDK_DB_DEFAULT_TYPE": DB_TYPE_SQLITE_MEMORY}) def test_run_query_failed_user_error_notification_sent(smtpd: SMTPDFix): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() db_plugin = DecoratedSqLite3MemoryDbPlugin() runner = CliEntryBasedTestRunner(db_plugin) @@ -98,7 +98,7 @@ def test_run_query_failed_user_error_notification_sent(smtpd: SMTPDFix): @mock.patch.dict(os.environ, {"VDK_DB_DEFAULT_TYPE": DB_TYPE_SQLITE_MEMORY}) def test_run_query_failed_user_error_no_notification_configured(smtpd: SMTPDFix): - errors.clear_intermediate_errors() + errors.resolvable_context().clear() db_plugin = DecoratedSqLite3MemoryDbPlugin() runner = CliEntryBasedTestRunner(db_plugin) diff --git a/projects/vdk-core/tests/vdk/internal/core/test_errors.py b/projects/vdk-core/tests/vdk/internal/core/test_errors.py index 04b7b7b848..3033074096 100644 --- a/projects/vdk-core/tests/vdk/internal/core/test_errors.py +++ b/projects/vdk-core/tests/vdk/internal/core/test_errors.py @@ -15,57 +15,83 @@ class ErrorsTest(unittest.TestCase): - def setUp(self): - errors.BLAMEES = defaultdict(list) - def tearDown(self): - errors.BLAMEES = defaultdict(list) + errors.resolvable_context().clear() + + def test_resolvable_context_singleton(self): + assert ( + errors.resolvable_context() + is errors.ResolvableContext() + is errors.resolvable_context() + is errors.ResolvableContext() + ) def test_get_blamee_overall_none(self): blamee = errors.get_blamee_overall() self.assertEqual(blamee, None, "There are no errors") def test_get_blamee_overall_platform(self): - errors._build_message_for_end_user( - errors.ResolvableBy.PLATFORM_ERROR, - what_happened="something happened", - why_it_happened="...", - consequences="XYZ", - countermeasures="Think! SRE", - ) - blamee = errors.get_blamee_overall() + try: + raise Exception() + except Exception as e: + errors.log_exception( + errors.ResolvableBy.PLATFORM_ERROR, + log, + what_happened="something happened", + why_it_happened="...", + consequences="XYZ", + countermeasures="Think! SRE", + exception=e, + ) self.assertEqual( - blamee, errors.ResolvableBy.PLATFORM_ERROR, "Platform exception" + errors.get_blamee_overall(), errors.ResolvableByActual.PLATFORM, "Platform" ) def test_get_blamee_overall_owner(self): - errors._build_message_for_end_user( - errors.ResolvableBy.USER_ERROR, - what_happened="something happened", - why_it_happened="...", - consequences="XYZ", - countermeasures="Think! Owner", + try: + raise Exception() + except Exception as e: + errors.log_exception( + errors.ResolvableBy.USER_ERROR, + log, + what_happened="something happened", + why_it_happened="...", + consequences="XYZ", + countermeasures="Think! Owner", + exception=e, + ) + self.assertEqual( + errors.get_blamee_overall(), errors.ResolvableByActual.USER, "User" ) - blamee = errors.get_blamee_overall() - self.assertEqual(blamee, errors.ResolvableBy.USER_ERROR, "Owner exception") def test_get_blamee_overall_both(self): - errors._build_message_for_end_user( - errors.ResolvableBy.PLATFORM_ERROR, - what_happened="something happened", - why_it_happened="...", - consequences="XYZ", - countermeasures="Think! SRE", - ) - errors._build_message_for_end_user( - errors.ResolvableBy.USER_ERROR, - what_happened="something happened", - why_it_happened="...", - consequences="XYZ", - countermeasures="Think! Owner", + try: + raise Exception() + except Exception as e: + errors.log_exception( + errors.ResolvableBy.PLATFORM_ERROR, + log, + what_happened="something happened", + why_it_happened="...", + consequences="XYZ", + countermeasures="Think! SRE", + exception=e, + ) + try: + raise Exception() + except Exception as e: + errors.log_exception( + errors.ResolvableBy.USER_ERROR, + log, + what_happened="something happened", + why_it_happened="...", + consequences="XYZ", + countermeasures="Think! Owner", + exception=e, + ) + self.assertEqual( + errors.get_blamee_overall(), errors.ResolvableByActual.USER, "User" ) - blamee = errors.get_blamee_overall() - self.assertEqual(blamee, errors.ResolvableBy.USER_ERROR, "Owner exception") def test_throws_correct_type(self): with self.assertRaises(errors.BaseVdkError) as context: