Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: ScreenshotCachePayload serialization #32156

Merged
merged 2 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions superset/charts/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ def build_response(status_code: int) -> WerkzeugResponse:

if cache_payload.should_trigger_task(force):
logger.info("Triggering screenshot ASYNC")
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload())
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload().to_dict())
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary Object Creation Before Serialization category Performance

Tell me more
What is the issue?

Creating a ScreenshotCachePayload instance just to immediately convert it to a dictionary introduces unnecessary object creation and serialization overhead.

Why this matters

When this operation is performed frequently, the creation and immediate destruction of temporary objects can impact memory usage and create additional garbage collection pressure.

Suggested change ∙ Feature Preview

Create the dictionary directly instead of creating and converting a temporary object:

screenshot_obj.cache.set(cache_key, {"status": "pending", "timestamp": datetime.now().timestamp()})

Report a problem with this comment

💬 Chat with Korbit by mentioning @korbit-ai.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Readability counts. -- Zen of Python

cache_chart_thumbnail.delay(
current_user=get_current_user(),
chart_id=chart.id,
Expand Down Expand Up @@ -755,7 +755,7 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Any) -> WerkzeugResponse:
logger.info(
"Triggering thumbnail compute (chart id: %s) ASYNC", str(chart.id)
)
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload())
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload().to_dict())
cache_chart_thumbnail.delay(
current_user=current_user,
chart_id=chart.id,
Expand Down
4 changes: 2 additions & 2 deletions superset/dashboards/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,7 @@ def build_response(status_code: int) -> WerkzeugResponse:

if cache_payload.should_trigger_task(force):
logger.info("Triggering screenshot ASYNC")
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload())
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload().to_dict())
cache_dashboard_screenshot.delay(
username=get_current_user(),
guest_token=(
Expand Down Expand Up @@ -1296,7 +1296,7 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Any) -> WerkzeugResponse:
"Triggering thumbnail compute (dashboard id: %s) ASYNC",
str(dashboard.id),
)
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload())
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload().to_dict())
cache_dashboard_thumbnail.delay(
current_user=current_user,
dashboard_id=dashboard.id,
Expand Down
51 changes: 41 additions & 10 deletions superset/utils/screenshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from datetime import datetime
from enum import Enum
from io import BytesIO
from typing import TYPE_CHECKING
from typing import cast, TYPE_CHECKING, TypedDict

from flask import current_app

Expand Down Expand Up @@ -63,13 +63,37 @@ class StatusValues(Enum):
ERROR = "Error"


class ScreenshotCachePayloadType(TypedDict):
image: bytes | None
timestamp: str
status: str


class ScreenshotCachePayload:
def __init__(self, image: bytes | None = None):
def __init__(
self,
image: bytes | None = None,
status: StatusValues = StatusValues.PENDING,
timestamp: str = "",
):
self._image = image
self._timestamp = datetime.now().isoformat()
self.status = StatusValues.PENDING
if image:
self.status = StatusValues.UPDATED
self._timestamp = timestamp or datetime.now().isoformat()
self.status = StatusValues.UPDATED if image else status
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Status Override on Image Presence category Functionality

Tell me more
What is the issue?

The initial status logic in ScreenshotCachePayload.init overrides the provided status parameter when an image is present, which may not be the desired behavior in all cases.

Why this matters

This could lead to unexpected state transitions when initializing a payload with both an image and a specific status, as the image presence will always force an UPDATED status.

Suggested change ∙ Feature Preview

Consider whether to respect the provided status parameter regardless of image presence:

self.status = status

Report a problem with this comment

💬 Chat with Korbit by mentioning @korbit-ai.


@classmethod
def from_dict(cls, payload: ScreenshotCachePayloadType) -> ScreenshotCachePayload:
return cls(
image=payload["image"],
status=StatusValues(payload["status"]),
timestamp=payload["timestamp"],
)

def to_dict(self) -> ScreenshotCachePayloadType:
return {
"image": self._image,
"timestamp": self._timestamp,
"status": self.status.value,
}

def update_timestamp(self) -> None:
self._timestamp = datetime.now().isoformat()
Expand Down Expand Up @@ -177,9 +201,16 @@ def get_from_cache(
def get_from_cache_key(cls, cache_key: str) -> ScreenshotCachePayload | None:
logger.info("Attempting to get from cache: %s", cache_key)
if payload := cls.cache.get(cache_key):
# for backwards compatability, byte objects should be converted
if not isinstance(payload, ScreenshotCachePayload):
# Initially, only bytes were stored. This was changed to store an instance
# of ScreenshotCachePayload, but since it can't be serialized in all
# backends it was further changed to a dict of attributes.
if isinstance(payload, bytes):
payload = ScreenshotCachePayload(payload)
elif isinstance(payload, ScreenshotCachePayload):
pass
elif isinstance(payload, dict):
payload = cast(ScreenshotCachePayloadType, payload)
payload = ScreenshotCachePayload.from_dict(payload)
return payload
logger.info("Failed at getting from cache: %s", cache_key)
return None
Expand Down Expand Up @@ -217,7 +248,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments
thumb_size = thumb_size or self.thumb_size
logger.info("Processing url for thumbnail: %s", cache_key)
cache_payload.computing()
self.cache.set(cache_key, cache_payload)
self.cache.set(cache_key, cache_payload.to_dict())
image = None
# Assuming all sorts of things can go wrong with Selenium
try:
Expand All @@ -239,7 +270,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments
logger.info("Caching thumbnail: %s", cache_key)
with event_logger.log_context(f"screenshot.cache.{self.thumbnail_type}"):
cache_payload.update(image)
self.cache.set(cache_key, cache_payload)
self.cache.set(cache_key, cache_payload.to_dict())
logger.info("Updated thumbnail cache; Status: %s", cache_payload.get_status())
return

Expand Down
22 changes: 11 additions & 11 deletions tests/unit_tests/utils/screenshot_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from superset.utils.screenshots import (
BaseScreenshot,
ScreenshotCachePayload,
StatusValues,
ScreenshotCachePayloadType,
)

BASE_SCREENSHOT_PATH = "superset.utils.screenshots.BaseScreenshot"
Expand Down Expand Up @@ -121,24 +121,24 @@ def _setup_compute_and_cache(self, mocker: MockerFixture, screenshot_obj):
def test_happy_path(self, mocker: MockerFixture, screenshot_obj):
self._setup_compute_and_cache(mocker, screenshot_obj)
screenshot_obj.compute_and_cache(force=False)
cache_payload: ScreenshotCachePayload = screenshot_obj.cache.get("key")
assert cache_payload.status == StatusValues.UPDATED
cache_payload: ScreenshotCachePayloadType = screenshot_obj.cache.get("key")
assert cache_payload["status"] == "Updated"

def test_screenshot_error(self, mocker: MockerFixture, screenshot_obj):
mocks = self._setup_compute_and_cache(mocker, screenshot_obj)
get_screenshot: MagicMock = mocks.get("get_screenshot")
get_screenshot.side_effect = Exception
screenshot_obj.compute_and_cache(force=False)
cache_payload: ScreenshotCachePayload = screenshot_obj.cache.get("key")
assert cache_payload.status == StatusValues.ERROR
cache_payload: ScreenshotCachePayloadType = screenshot_obj.cache.get("key")
assert cache_payload["status"] == "Error"

def test_resize_error(self, mocker: MockerFixture, screenshot_obj):
mocks = self._setup_compute_and_cache(mocker, screenshot_obj)
resize_image: MagicMock = mocks.get("resize_image")
resize_image.side_effect = Exception
screenshot_obj.compute_and_cache(force=False)
cache_payload: ScreenshotCachePayload = screenshot_obj.cache.get("key")
assert cache_payload.status == StatusValues.ERROR
cache_payload: ScreenshotCachePayloadType = screenshot_obj.cache.get("key")
assert cache_payload["status"] == "Error"

def test_skips_if_computing(self, mocker: MockerFixture, screenshot_obj):
mocks = self._setup_compute_and_cache(mocker, screenshot_obj)
Expand All @@ -155,8 +155,8 @@ def test_skips_if_computing(self, mocker: MockerFixture, screenshot_obj):
# Ensure that it processes when force = True
screenshot_obj.compute_and_cache(force=True)
get_screenshot.assert_called_once()
cache_payload: ScreenshotCachePayload = screenshot_obj.cache.get("key")
assert cache_payload.status == StatusValues.UPDATED
cache_payload: ScreenshotCachePayloadType = screenshot_obj.cache.get("key")
assert cache_payload["status"] == "Updated"

def test_skips_if_updated(self, mocker: MockerFixture, screenshot_obj):
mocks = self._setup_compute_and_cache(mocker, screenshot_obj)
Expand All @@ -177,8 +177,8 @@ def test_skips_if_updated(self, mocker: MockerFixture, screenshot_obj):
force=True, window_size=window_size, thumb_size=thumb_size
)
get_screenshot.assert_called_once()
cache_payload: ScreenshotCachePayload = screenshot_obj.cache.get("key")
assert cache_payload._image != b"initial_value"
cache_payload: ScreenshotCachePayloadType = screenshot_obj.cache.get("key")
assert cache_payload["image"] != b"initial_value"

def test_resize(self, mocker: MockerFixture, screenshot_obj):
mocks = self._setup_compute_and_cache(mocker, screenshot_obj)
Expand Down
Loading