Skip to content

Commit

Permalink
feat: record Success-Fail telemetry event on job attachments upload. (#…
Browse files Browse the repository at this point in the history
…393)

Signed-off-by: David Leong <[email protected]>
  • Loading branch information
leongdl authored Jul 10, 2024
1 parent 54fd67c commit 9bc590b
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 12 deletions.
6 changes: 5 additions & 1 deletion src/deadline/client/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"get_telemetry_client",
"get_deadline_cloud_library_telemetry_client",
"get_storage_profile_for_queue",
"record_success_fail_telemetry_event",
]

# The following import is needed to prevent the following sporadic failure:
Expand Down Expand Up @@ -52,12 +53,15 @@
list_storage_profiles_for_queue,
)
from ._queue_parameters import get_queue_parameter_definitions
from ._submit_job_bundle import create_job_from_job_bundle, wait_for_create_job_to_complete

# Telemetry must be imported before Submit Job Bundle to avoid circular dependencies.
from ._telemetry import (
get_telemetry_client,
get_deadline_cloud_library_telemetry_client,
TelemetryClient,
record_success_fail_telemetry_event,
)
from ._submit_job_bundle import create_job_from_job_bundle, wait_for_create_job_to_complete
from ._get_storage_profile_for_queue import get_storage_profile_for_queue

logger = getLogger(__name__)
Expand Down
3 changes: 2 additions & 1 deletion src/deadline/client/api/_submit_job_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def create_job_from_job_bundle(
hashing_progress_callback=hashing_progress_callback,
)

attachment_settings = _upload_attachments(
attachment_settings = _upload_attachments( # type: ignore
asset_manager, asset_manifests, print_function_callback, upload_progress_callback
)
attachment_settings["fileSystem"] = JobAttachmentsFileSystem(
Expand Down Expand Up @@ -432,6 +432,7 @@ def _default_update_hash_progress(hashing_metadata: Dict[str, str]) -> bool:
return hashing_summary, manifests


@api.record_success_fail_telemetry_event(metric_name="cli_asset_upload") # type: ignore
def _upload_attachments(
asset_manager: S3AssetManager,
manifests: List[AssetRootManifest],
Expand Down
37 changes: 36 additions & 1 deletion src/deadline/client/api/_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from datetime import datetime
from queue import Queue, Full
from threading import Thread
from typing import Any, Dict, Optional
from typing import Any, Callable, Dict, List, Optional, TypeVar, cast
from urllib import request, error

from ...job_attachments.progress_tracker import SummaryStatistics
Expand All @@ -32,6 +32,10 @@
logger = logging.getLogger(__name__)


# Generic function return type.
F = TypeVar("F", bound=Callable[..., Any])


def get_deadline_endpoint_url(
config: Optional[ConfigParser] = None,
) -> str:
Expand Down Expand Up @@ -349,3 +353,34 @@ def get_deadline_cloud_library_telemetry_client(
:return: Telemetry client to make requests with.
"""
return get_telemetry_client("deadline-cloud-library", version, config=config)


def record_success_fail_telemetry_event(**decorator_kwargs: Dict[str, Any]) -> Callable[..., F]:
"""
Decorator to try catch a function. Sends a success / fail telemetry event.
:param ** Python variable arguments. See https://docs.python.org/3/glossary.html#term-parameter.
"""

def inner(function: F) -> F:
def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any:
"""
Wrapper to try-catch a function for telemetry
:param * Python variable argument. See https://docs.python.org/3/glossary.html#term-parameter
:param ** Python variable argument. See https://docs.python.org/3/glossary.html#term-parameter
"""
success: bool = True
try:
return function(*args, **kwargs)
except Exception as e:
success = False
raise e
finally:
event_name = decorator_kwargs.get("metric_name", function.__name__)
get_deadline_cloud_library_telemetry_client().record_event(
event_type=f"com.amazon.rum.deadline.{event_name}",
event_details={"is_success": success},
)

return cast(F, wrapper)

return inner
25 changes: 18 additions & 7 deletions src/deadline/client/cli/_groups/job_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pathlib import Path
import os
import sys
from typing import Optional, Union
from typing import Callable, Optional, Union
import datetime
from typing import Any

Expand Down Expand Up @@ -396,6 +396,19 @@ def _download_job_output(
# in Job Attachments library can be increased (currently using default number, 10, which
# makes it keep logging urllib3 warning messages when downloading large files)
with _modified_logging_level(logging.getLogger("urllib3"), logging.ERROR):

@api.record_success_fail_telemetry_event(metric_name="download_job_output") # type: ignore
def _download_job_output(
file_conflict_resolution: Optional[
FileConflictResolution
] = FileConflictResolution.CREATE_COPY,
on_downloading_files: Optional[Callable[[ProgressReportMetadata], bool]] = None,
) -> DownloadSummaryStatistics:
return job_output_downloader.download_job_output(
file_conflict_resolution=file_conflict_resolution,
on_downloading_files=on_downloading_files,
)

if not is_json_format:
# Note: click doesn't export the return type of progressbar(), so we suppress mypy warnings for
# not annotating the type of download_progress.
Expand All @@ -408,11 +421,9 @@ def _update_download_progress(download_metadata: ProgressReportMetadata) -> bool
download_progress.update(new_progress)
return sigint_handler.continue_operation

download_summary: DownloadSummaryStatistics = (
job_output_downloader.download_job_output(
file_conflict_resolution=file_conflict_resolution,
on_downloading_files=_update_download_progress,
)
download_summary: DownloadSummaryStatistics = _download_job_output( # type: ignore
file_conflict_resolution=file_conflict_resolution,
on_downloading_files=_update_download_progress,
)
else:

Expand All @@ -423,7 +434,7 @@ def _update_download_progress(download_metadata: ProgressReportMetadata) -> bool
# TODO: enable download cancellation for JSON format
return True

download_summary = job_output_downloader.download_job_output(
download_summary = _download_job_output( # type: ignore
file_conflict_resolution=file_conflict_resolution,
on_downloading_files=_update_download_progress,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ def _update_hash_progress(hashing_metadata: ProgressReportMetadata) -> bool:
# Send the exception to the dialog
self.hashing_thread_exception.emit(e)

@api.record_success_fail_telemetry_event(metric_name="gui_asset_upload") # type: ignore
def _upload_background_thread(self, manifests: List[AssetRootManifest]) -> None:
"""
This function gets started in a background thread to start the upload
Expand Down
65 changes: 64 additions & 1 deletion test/unit/deadline_client/api/test_api_telemetry.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

from typing import Any, Dict
import pytest
import uuid
import time
Expand All @@ -9,7 +10,12 @@
from urllib import request

from deadline.client import api, config
from deadline.client.api._telemetry import TelemetryClient, TelemetryEvent
from deadline.client.api._telemetry import (
TelemetryClient,
TelemetryEvent,
get_deadline_cloud_library_telemetry_client,
record_success_fail_telemetry_event,
)
from deadline.job_attachments.progress_tracker import SummaryStatistics


Expand Down Expand Up @@ -288,3 +294,60 @@ def test_get_prefixed_endpoint(
):
"""Test that the _get_prefixed_endpoint function returns the expected prefixed endpoint"""
assert mock_telemetry_client._get_prefixed_endpoint(endpoint, prefix) == expected_result


def test_record_decorator_success():
"""Tests that recording a decorator successful metric"""
with patch.object(
api._telemetry, "get_deadline_endpoint_url", side_effect=["https://fake-endpoint-url"]
):
# GIVEN
queue_mock = MagicMock()
expected_summary: Dict[str, Any] = dict()
expected_summary["is_success"] = True
expected_summary["usage_mode"] = "CLI"
expected_event = TelemetryEvent(
event_type="com.amazon.rum.deadline.successful",
event_details=expected_summary,
)
telemetry_client = get_deadline_cloud_library_telemetry_client()
telemetry_client.event_queue = queue_mock

@record_success_fail_telemetry_event()
def successful():
return

# WHEN
successful() # type:ignore

# THEN
queue_mock.put_nowait.assert_called_once_with(expected_event)


def test_record_decorator_fails():
"""Tests that recording a decorator failed metric"""
with patch.object(
api._telemetry, "get_deadline_endpoint_url", side_effect=["https://fake-endpoint-url"]
):
# GIVEN
queue_mock = MagicMock()
expected_summary: Dict[str, Any] = dict()
expected_summary["is_success"] = False
expected_summary["usage_mode"] = "CLI"
expected_event = TelemetryEvent(
event_type="com.amazon.rum.deadline.fails",
event_details=expected_summary,
)
telemetry_client = get_deadline_cloud_library_telemetry_client()
telemetry_client.event_queue = queue_mock

@record_success_fail_telemetry_event()
def fails():
raise RuntimeError("foobar")

# WHEN
with pytest.raises(RuntimeError):
fails() # type:ignore

# THEN
queue_mock.put_nowait.assert_called_once_with(expected_event)
3 changes: 2 additions & 1 deletion test/unit/deadline_client/api/test_job_bundle_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def test_create_job_from_job_bundle_job_attachments(
S3AssetManager, "upload_assets"
) as mock_upload_assets, patch.object(
_submit_job_bundle.api, "get_deadline_cloud_library_telemetry_client"
), patch.object(
) as mock_telemetry, patch.object(
api._telemetry, "get_deadline_endpoint_url", side_effect=["https://fake-endpoint-url"]
):
client_mock().get_queue.side_effect = [MOCK_GET_QUEUE_RESPONSE]
Expand Down Expand Up @@ -585,6 +585,7 @@ def fake_print_callback(msg: str) -> None:
"fileSystem": JobAttachmentsFileSystem.COPIED,
},
)
assert mock_telemetry.call_count == 3


def test_create_job_from_job_bundle_empty_job_attachments(
Expand Down

0 comments on commit 9bc590b

Please sign in to comment.