-
Notifications
You must be signed in to change notification settings - Fork 14.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: ScreenshotCachePayload serialization #32156
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,7 @@ | |
from datetime import datetime | ||
from enum import Enum | ||
from io import BytesIO | ||
from typing import TYPE_CHECKING | ||
from typing import cast, TYPE_CHECKING, TypedDict | ||
|
||
from flask import current_app | ||
|
||
|
@@ -63,13 +63,37 @@ class StatusValues(Enum): | |
ERROR = "Error" | ||
|
||
|
||
class ScreenshotCachePayloadType(TypedDict): | ||
image: bytes | None | ||
timestamp: str | ||
status: str | ||
|
||
|
||
class ScreenshotCachePayload: | ||
def __init__(self, image: bytes | None = None): | ||
def __init__( | ||
self, | ||
image: bytes | None = None, | ||
status: StatusValues = StatusValues.PENDING, | ||
timestamp: str = "", | ||
): | ||
self._image = image | ||
self._timestamp = datetime.now().isoformat() | ||
self.status = StatusValues.PENDING | ||
if image: | ||
self.status = StatusValues.UPDATED | ||
self._timestamp = timestamp or datetime.now().isoformat() | ||
self.status = StatusValues.UPDATED if image else status | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Status Override on Image Presence
Tell me moreWhat is the issue?The initial status logic in ScreenshotCachePayload.init overrides the provided status parameter when an image is present, which may not be the desired behavior in all cases. Why this mattersThis could lead to unexpected state transitions when initializing a payload with both an image and a specific status, as the image presence will always force an UPDATED status. Suggested change ∙ Feature PreviewConsider whether to respect the provided status parameter regardless of image presence: self.status = status 💬 Chat with Korbit by mentioning @korbit-ai. |
||
|
||
@classmethod | ||
def from_dict(cls, payload: ScreenshotCachePayloadType) -> ScreenshotCachePayload: | ||
return cls( | ||
image=payload["image"], | ||
status=StatusValues(payload["status"]), | ||
timestamp=payload["timestamp"], | ||
) | ||
|
||
def to_dict(self) -> ScreenshotCachePayloadType: | ||
return { | ||
"image": self._image, | ||
"timestamp": self._timestamp, | ||
"status": self.status.value, | ||
} | ||
|
||
def update_timestamp(self) -> None: | ||
self._timestamp = datetime.now().isoformat() | ||
|
@@ -177,9 +201,16 @@ def get_from_cache( | |
def get_from_cache_key(cls, cache_key: str) -> ScreenshotCachePayload | None: | ||
logger.info("Attempting to get from cache: %s", cache_key) | ||
if payload := cls.cache.get(cache_key): | ||
# for backwards compatability, byte objects should be converted | ||
if not isinstance(payload, ScreenshotCachePayload): | ||
# Initially, only bytes were stored. This was changed to store an instance | ||
# of ScreenshotCachePayload, but since it can't be serialized in all | ||
# backends it was further changed to a dict of attributes. | ||
if isinstance(payload, bytes): | ||
payload = ScreenshotCachePayload(payload) | ||
elif isinstance(payload, ScreenshotCachePayload): | ||
pass | ||
elif isinstance(payload, dict): | ||
payload = cast(ScreenshotCachePayloadType, payload) | ||
payload = ScreenshotCachePayload.from_dict(payload) | ||
return payload | ||
logger.info("Failed at getting from cache: %s", cache_key) | ||
return None | ||
|
@@ -217,7 +248,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments | |
thumb_size = thumb_size or self.thumb_size | ||
logger.info("Processing url for thumbnail: %s", cache_key) | ||
cache_payload.computing() | ||
self.cache.set(cache_key, cache_payload) | ||
self.cache.set(cache_key, cache_payload.to_dict()) | ||
image = None | ||
# Assuming all sorts of things can go wrong with Selenium | ||
try: | ||
|
@@ -239,7 +270,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments | |
logger.info("Caching thumbnail: %s", cache_key) | ||
with event_logger.log_context(f"screenshot.cache.{self.thumbnail_type}"): | ||
cache_payload.update(image) | ||
self.cache.set(cache_key, cache_payload) | ||
self.cache.set(cache_key, cache_payload.to_dict()) | ||
logger.info("Updated thumbnail cache; Status: %s", cache_payload.get_status()) | ||
return | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unnecessary Object Creation Before Serialization![category Performance](https://camo.githubusercontent.com/c214095a9a15cdec09fcba5f2705c7a01bded9162b954ab44b2a0f892734daaf/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f506572666f726d616e63652d346634366535)
Tell me more
What is the issue?
Creating a ScreenshotCachePayload instance just to immediately convert it to a dictionary introduces unnecessary object creation and serialization overhead.
Why this matters
When this operation is performed frequently, the creation and immediate destruction of temporary objects can impact memory usage and create additional garbage collection pressure.
Suggested change ∙ Feature Preview
Create the dictionary directly instead of creating and converting a temporary object:
💬 Chat with Korbit by mentioning @korbit-ai.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.