From 9bc590b1fc17bf3c6e33d6b46e4231d5e252526b Mon Sep 17 00:00:00 2001 From: David Leong <116610336+leongdl@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:53:55 -0700 Subject: [PATCH] feat: record Success-Fail telemetry event on job attachments upload. (#393) Signed-off-by: David Leong <116610336+leongdl@users.noreply.github.com> --- src/deadline/client/api/__init__.py | 6 +- src/deadline/client/api/_submit_job_bundle.py | 3 +- src/deadline/client/api/_telemetry.py | 37 ++++++++++- src/deadline/client/cli/_groups/job_group.py | 25 +++++-- .../ui/dialogs/submit_job_progress_dialog.py | 1 + .../deadline_client/api/test_api_telemetry.py | 65 ++++++++++++++++++- .../api/test_job_bundle_submission.py | 3 +- 7 files changed, 128 insertions(+), 12 deletions(-) diff --git a/src/deadline/client/api/__init__.py b/src/deadline/client/api/__init__.py index 58383c43..2d8a5b55 100644 --- a/src/deadline/client/api/__init__.py +++ b/src/deadline/client/api/__init__.py @@ -23,6 +23,7 @@ "get_telemetry_client", "get_deadline_cloud_library_telemetry_client", "get_storage_profile_for_queue", + "record_success_fail_telemetry_event", ] # The following import is needed to prevent the following sporadic failure: @@ -52,12 +53,15 @@ list_storage_profiles_for_queue, ) from ._queue_parameters import get_queue_parameter_definitions -from ._submit_job_bundle import create_job_from_job_bundle, wait_for_create_job_to_complete + +# Telemetry must be imported before Submit Job Bundle to avoid circular dependencies. from ._telemetry import ( get_telemetry_client, get_deadline_cloud_library_telemetry_client, TelemetryClient, + record_success_fail_telemetry_event, ) +from ._submit_job_bundle import create_job_from_job_bundle, wait_for_create_job_to_complete from ._get_storage_profile_for_queue import get_storage_profile_for_queue logger = getLogger(__name__) diff --git a/src/deadline/client/api/_submit_job_bundle.py b/src/deadline/client/api/_submit_job_bundle.py index 31f08d04..592a1e0a 100644 --- a/src/deadline/client/api/_submit_job_bundle.py +++ b/src/deadline/client/api/_submit_job_bundle.py @@ -285,7 +285,7 @@ def create_job_from_job_bundle( hashing_progress_callback=hashing_progress_callback, ) - attachment_settings = _upload_attachments( + attachment_settings = _upload_attachments( # type: ignore asset_manager, asset_manifests, print_function_callback, upload_progress_callback ) attachment_settings["fileSystem"] = JobAttachmentsFileSystem( @@ -432,6 +432,7 @@ def _default_update_hash_progress(hashing_metadata: Dict[str, str]) -> bool: return hashing_summary, manifests +@api.record_success_fail_telemetry_event(metric_name="cli_asset_upload") # type: ignore def _upload_attachments( asset_manager: S3AssetManager, manifests: List[AssetRootManifest], diff --git a/src/deadline/client/api/_telemetry.py b/src/deadline/client/api/_telemetry.py index 41ad946f..5c384675 100644 --- a/src/deadline/client/api/_telemetry.py +++ b/src/deadline/client/api/_telemetry.py @@ -14,7 +14,7 @@ from datetime import datetime from queue import Queue, Full from threading import Thread -from typing import Any, Dict, Optional +from typing import Any, Callable, Dict, List, Optional, TypeVar, cast from urllib import request, error from ...job_attachments.progress_tracker import SummaryStatistics @@ -32,6 +32,10 @@ logger = logging.getLogger(__name__) +# Generic function return type. +F = TypeVar("F", bound=Callable[..., Any]) + + def get_deadline_endpoint_url( config: Optional[ConfigParser] = None, ) -> str: @@ -349,3 +353,34 @@ def get_deadline_cloud_library_telemetry_client( :return: Telemetry client to make requests with. """ return get_telemetry_client("deadline-cloud-library", version, config=config) + + +def record_success_fail_telemetry_event(**decorator_kwargs: Dict[str, Any]) -> Callable[..., F]: + """ + Decorator to try catch a function. Sends a success / fail telemetry event. + :param ** Python variable arguments. See https://docs.python.org/3/glossary.html#term-parameter. + """ + + def inner(function: F) -> F: + def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: + """ + Wrapper to try-catch a function for telemetry + :param * Python variable argument. See https://docs.python.org/3/glossary.html#term-parameter + :param ** Python variable argument. See https://docs.python.org/3/glossary.html#term-parameter + """ + success: bool = True + try: + return function(*args, **kwargs) + except Exception as e: + success = False + raise e + finally: + event_name = decorator_kwargs.get("metric_name", function.__name__) + get_deadline_cloud_library_telemetry_client().record_event( + event_type=f"com.amazon.rum.deadline.{event_name}", + event_details={"is_success": success}, + ) + + return cast(F, wrapper) + + return inner diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index fe779314..98ec16d6 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -11,7 +11,7 @@ from pathlib import Path import os import sys -from typing import Optional, Union +from typing import Callable, Optional, Union import datetime from typing import Any @@ -396,6 +396,19 @@ def _download_job_output( # in Job Attachments library can be increased (currently using default number, 10, which # makes it keep logging urllib3 warning messages when downloading large files) with _modified_logging_level(logging.getLogger("urllib3"), logging.ERROR): + + @api.record_success_fail_telemetry_event(metric_name="download_job_output") # type: ignore + def _download_job_output( + file_conflict_resolution: Optional[ + FileConflictResolution + ] = FileConflictResolution.CREATE_COPY, + on_downloading_files: Optional[Callable[[ProgressReportMetadata], bool]] = None, + ) -> DownloadSummaryStatistics: + return job_output_downloader.download_job_output( + file_conflict_resolution=file_conflict_resolution, + on_downloading_files=on_downloading_files, + ) + if not is_json_format: # Note: click doesn't export the return type of progressbar(), so we suppress mypy warnings for # not annotating the type of download_progress. @@ -408,11 +421,9 @@ def _update_download_progress(download_metadata: ProgressReportMetadata) -> bool download_progress.update(new_progress) return sigint_handler.continue_operation - download_summary: DownloadSummaryStatistics = ( - job_output_downloader.download_job_output( - file_conflict_resolution=file_conflict_resolution, - on_downloading_files=_update_download_progress, - ) + download_summary: DownloadSummaryStatistics = _download_job_output( # type: ignore + file_conflict_resolution=file_conflict_resolution, + on_downloading_files=_update_download_progress, ) else: @@ -423,7 +434,7 @@ def _update_download_progress(download_metadata: ProgressReportMetadata) -> bool # TODO: enable download cancellation for JSON format return True - download_summary = job_output_downloader.download_job_output( + download_summary = _download_job_output( # type: ignore file_conflict_resolution=file_conflict_resolution, on_downloading_files=_update_download_progress, ) diff --git a/src/deadline/client/ui/dialogs/submit_job_progress_dialog.py b/src/deadline/client/ui/dialogs/submit_job_progress_dialog.py index 6de80c2d..46d3660d 100644 --- a/src/deadline/client/ui/dialogs/submit_job_progress_dialog.py +++ b/src/deadline/client/ui/dialogs/submit_job_progress_dialog.py @@ -366,6 +366,7 @@ def _update_hash_progress(hashing_metadata: ProgressReportMetadata) -> bool: # Send the exception to the dialog self.hashing_thread_exception.emit(e) + @api.record_success_fail_telemetry_event(metric_name="gui_asset_upload") # type: ignore def _upload_background_thread(self, manifests: List[AssetRootManifest]) -> None: """ This function gets started in a background thread to start the upload diff --git a/test/unit/deadline_client/api/test_api_telemetry.py b/test/unit/deadline_client/api/test_api_telemetry.py index 788a7c0f..2a44be34 100644 --- a/test/unit/deadline_client/api/test_api_telemetry.py +++ b/test/unit/deadline_client/api/test_api_telemetry.py @@ -1,5 +1,6 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +from typing import Any, Dict import pytest import uuid import time @@ -9,7 +10,12 @@ from urllib import request from deadline.client import api, config -from deadline.client.api._telemetry import TelemetryClient, TelemetryEvent +from deadline.client.api._telemetry import ( + TelemetryClient, + TelemetryEvent, + get_deadline_cloud_library_telemetry_client, + record_success_fail_telemetry_event, +) from deadline.job_attachments.progress_tracker import SummaryStatistics @@ -288,3 +294,60 @@ def test_get_prefixed_endpoint( ): """Test that the _get_prefixed_endpoint function returns the expected prefixed endpoint""" assert mock_telemetry_client._get_prefixed_endpoint(endpoint, prefix) == expected_result + + +def test_record_decorator_success(): + """Tests that recording a decorator successful metric""" + with patch.object( + api._telemetry, "get_deadline_endpoint_url", side_effect=["https://fake-endpoint-url"] + ): + # GIVEN + queue_mock = MagicMock() + expected_summary: Dict[str, Any] = dict() + expected_summary["is_success"] = True + expected_summary["usage_mode"] = "CLI" + expected_event = TelemetryEvent( + event_type="com.amazon.rum.deadline.successful", + event_details=expected_summary, + ) + telemetry_client = get_deadline_cloud_library_telemetry_client() + telemetry_client.event_queue = queue_mock + + @record_success_fail_telemetry_event() + def successful(): + return + + # WHEN + successful() # type:ignore + + # THEN + queue_mock.put_nowait.assert_called_once_with(expected_event) + + +def test_record_decorator_fails(): + """Tests that recording a decorator failed metric""" + with patch.object( + api._telemetry, "get_deadline_endpoint_url", side_effect=["https://fake-endpoint-url"] + ): + # GIVEN + queue_mock = MagicMock() + expected_summary: Dict[str, Any] = dict() + expected_summary["is_success"] = False + expected_summary["usage_mode"] = "CLI" + expected_event = TelemetryEvent( + event_type="com.amazon.rum.deadline.fails", + event_details=expected_summary, + ) + telemetry_client = get_deadline_cloud_library_telemetry_client() + telemetry_client.event_queue = queue_mock + + @record_success_fail_telemetry_event() + def fails(): + raise RuntimeError("foobar") + + # WHEN + with pytest.raises(RuntimeError): + fails() # type:ignore + + # THEN + queue_mock.put_nowait.assert_called_once_with(expected_event) diff --git a/test/unit/deadline_client/api/test_job_bundle_submission.py b/test/unit/deadline_client/api/test_job_bundle_submission.py index 36d77120..26788dcc 100644 --- a/test/unit/deadline_client/api/test_job_bundle_submission.py +++ b/test/unit/deadline_client/api/test_job_bundle_submission.py @@ -487,7 +487,7 @@ def test_create_job_from_job_bundle_job_attachments( S3AssetManager, "upload_assets" ) as mock_upload_assets, patch.object( _submit_job_bundle.api, "get_deadline_cloud_library_telemetry_client" - ), patch.object( + ) as mock_telemetry, patch.object( api._telemetry, "get_deadline_endpoint_url", side_effect=["https://fake-endpoint-url"] ): client_mock().get_queue.side_effect = [MOCK_GET_QUEUE_RESPONSE] @@ -585,6 +585,7 @@ def fake_print_callback(msg: str) -> None: "fileSystem": JobAttachmentsFileSystem.COPIED, }, ) + assert mock_telemetry.call_count == 3 def test_create_job_from_job_bundle_empty_job_attachments(