From a59934ac972546739e0add3db1cc06dc02bef370 Mon Sep 17 00:00:00 2001 From: Ahmad-mtos Date: Sun, 17 Nov 2024 21:01:55 +0300 Subject: [PATCH] fix(agents-api): Remove screenshot after every action --- .../agents_api/routers/sessions/chat.py | 3 +- .../utils/integrations/remote_browser.py | 43 +++++++------------ 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/agents-api/agents_api/routers/sessions/chat.py b/agents-api/agents_api/routers/sessions/chat.py index 5716165ba..958094d14 100644 --- a/agents-api/agents_api/routers/sessions/chat.py +++ b/agents-api/agents_api/routers/sessions/chat.py @@ -55,7 +55,8 @@ async def request_anthropic( # Skip messages that are not assistant or user if msg["role"] not in ["assistant", "user"]: continue - + + # FIXME: return the tool call ids (save assistant message in entries as json dump) # Transform the message content and tool calls if msg["role"] == "assistant": transformed_content = [ diff --git a/integrations-service/integrations/utils/integrations/remote_browser.py b/integrations-service/integrations/utils/integrations/remote_browser.py index 6e5d63375..21ecc3a59 100644 --- a/integrations-service/integrations/utils/integrations/remote_browser.py +++ b/integrations-service/integrations/utils/integrations/remote_browser.py @@ -104,26 +104,26 @@ async def _reset_mouse(self) -> None: window.$$julep$$_initialized = true; """) - @staticmethod - def _with_error_and_screenshot(f): - @wraps(f) - async def wrapper(self: "PlaywrightActions", *args, **kwargs): - try: - result: RemoteBrowserOutput = await f(self, *args, **kwargs) - await self._wait_for_load() + # @staticmethod + # def _with_error_and_screenshot(f): + # @wraps(f) + # async def wrapper(self: "PlaywrightActions", *args, **kwargs): + # try: + # result: RemoteBrowserOutput = await f(self, *args, **kwargs) + # await self._wait_for_load() - screenshot: RemoteBrowserOutput = await self.take_screenshot() + # screenshot: RemoteBrowserOutput = await self.take_screenshot() - return RemoteBrowserOutput( - output=result.output, - base64_image=screenshot.base64_image, - system=result.system or f.__name__, - ) + # return RemoteBrowserOutput( + # output=result.output, + # base64_image=screenshot.base64_image, + # system=result.system or f.__name__, + # ) - except Exception as e: - return RemoteBrowserOutput(error=str(e)) + # except Exception as e: + # return RemoteBrowserOutput(error=str(e)) - return wrapper + # return wrapper async def _get_screen_size(self) -> tuple[int, int]: """Get the current browser viewport size""" @@ -198,7 +198,6 @@ def _overlay_cursor(self, screenshot_bytes: bytes, x: int, y: int) -> bytes: # --- # Actions - @_with_error_and_screenshot async def navigate(self, url: str) -> RemoteBrowserOutput: """Navigate to a specific URL""" await self.page.goto(url) @@ -208,7 +207,6 @@ async def navigate(self, url: str) -> RemoteBrowserOutput: output=url, ) - @_with_error_and_screenshot async def refresh(self) -> RemoteBrowserOutput: """Refresh the current page""" await self.page.reload() @@ -218,7 +216,6 @@ async def refresh(self) -> RemoteBrowserOutput: output="Refreshed page", ) - @_with_error_and_screenshot async def cursor_position(self) -> RemoteBrowserOutput: """Get current mouse coordinates""" x, y = await self._get_mouse_coordinates() @@ -226,7 +223,6 @@ async def cursor_position(self) -> RemoteBrowserOutput: output=f"X={x}, Y={y}", ) - @_with_error_and_screenshot async def press_key(self, key_combination: str) -> RemoteBrowserOutput: """Press a key or key combination""" # Split combination into individual keys @@ -257,7 +253,6 @@ async def press_key(self, key_combination: str) -> RemoteBrowserOutput: output=f"Pressed {key_combination}", ) - @_with_error_and_screenshot async def type_text(self, text: str) -> RemoteBrowserOutput: """Type a string of text""" await self.page.keyboard.type(text) @@ -266,7 +261,6 @@ async def type_text(self, text: str) -> RemoteBrowserOutput: output=f"Typed {text}", ) - @_with_error_and_screenshot async def mouse_move(self, coordinate: tuple[int, int]) -> RemoteBrowserOutput: """Move mouse to specified coordinates""" await self.mouse.move(*coordinate) @@ -275,7 +269,6 @@ async def mouse_move(self, coordinate: tuple[int, int]) -> RemoteBrowserOutput: output=f"Moved mouse to {coordinate}", ) - @_with_error_and_screenshot async def left_click(self) -> RemoteBrowserOutput: """Perform left mouse click""" x, y = await self._get_mouse_coordinates() @@ -285,7 +278,6 @@ async def left_click(self) -> RemoteBrowserOutput: output="Left clicked", ) - @_with_error_and_screenshot async def left_click_drag(self, coordinate: tuple[int, int]) -> RemoteBrowserOutput: """Click and drag to specified coordinates""" await self.mouse.down() @@ -296,7 +288,6 @@ async def left_click_drag(self, coordinate: tuple[int, int]) -> RemoteBrowserOut output=f"Left clicked and dragged to {coordinate}", ) - @_with_error_and_screenshot async def right_click(self) -> RemoteBrowserOutput: """Perform right mouse click""" x, y = await self._get_mouse_coordinates() @@ -306,7 +297,6 @@ async def right_click(self) -> RemoteBrowserOutput: output="Right clicked", ) - @_with_error_and_screenshot async def middle_click(self) -> RemoteBrowserOutput: """Perform middle mouse click""" x, y = await self._get_mouse_coordinates() @@ -316,7 +306,6 @@ async def middle_click(self) -> RemoteBrowserOutput: output="Middle clicked", ) - @_with_error_and_screenshot async def double_click(self) -> RemoteBrowserOutput: """Perform double click""" x, y = await self._get_mouse_coordinates()