Skip to content

Commit

Permalink
fix(agents-api): Remove screenshot after every action
Browse files Browse the repository at this point in the history
  • Loading branch information
Ahmad-mtos committed Nov 17, 2024
1 parent 805b5e3 commit a59934a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 28 deletions.
3 changes: 2 additions & 1 deletion agents-api/agents_api/routers/sessions/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ async def request_anthropic(
# Skip messages that are not assistant or user
if msg["role"] not in ["assistant", "user"]:
continue


# FIXME: return the tool call ids (save assistant message in entries as json dump)
# Transform the message content and tool calls
if msg["role"] == "assistant":
transformed_content = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,26 +104,26 @@ async def _reset_mouse(self) -> None:
window.$$julep$$_initialized = true;
""")

@staticmethod
def _with_error_and_screenshot(f):
@wraps(f)
async def wrapper(self: "PlaywrightActions", *args, **kwargs):
try:
result: RemoteBrowserOutput = await f(self, *args, **kwargs)
await self._wait_for_load()
# @staticmethod
# def _with_error_and_screenshot(f):
# @wraps(f)
# async def wrapper(self: "PlaywrightActions", *args, **kwargs):
# try:
# result: RemoteBrowserOutput = await f(self, *args, **kwargs)
# await self._wait_for_load()

screenshot: RemoteBrowserOutput = await self.take_screenshot()
# screenshot: RemoteBrowserOutput = await self.take_screenshot()

return RemoteBrowserOutput(
output=result.output,
base64_image=screenshot.base64_image,
system=result.system or f.__name__,
)
# return RemoteBrowserOutput(
# output=result.output,
# base64_image=screenshot.base64_image,
# system=result.system or f.__name__,
# )

except Exception as e:
return RemoteBrowserOutput(error=str(e))
# except Exception as e:
# return RemoteBrowserOutput(error=str(e))

return wrapper
# return wrapper

async def _get_screen_size(self) -> tuple[int, int]:
"""Get the current browser viewport size"""
Expand Down Expand Up @@ -198,7 +198,6 @@ def _overlay_cursor(self, screenshot_bytes: bytes, x: int, y: int) -> bytes:
# ---
# Actions

@_with_error_and_screenshot
async def navigate(self, url: str) -> RemoteBrowserOutput:
"""Navigate to a specific URL"""
await self.page.goto(url)
Expand All @@ -208,7 +207,6 @@ async def navigate(self, url: str) -> RemoteBrowserOutput:
output=url,
)

@_with_error_and_screenshot
async def refresh(self) -> RemoteBrowserOutput:
"""Refresh the current page"""
await self.page.reload()
Expand All @@ -218,15 +216,13 @@ async def refresh(self) -> RemoteBrowserOutput:
output="Refreshed page",
)

@_with_error_and_screenshot
async def cursor_position(self) -> RemoteBrowserOutput:
"""Get current mouse coordinates"""
x, y = await self._get_mouse_coordinates()
return RemoteBrowserOutput(
output=f"X={x}, Y={y}",
)

@_with_error_and_screenshot
async def press_key(self, key_combination: str) -> RemoteBrowserOutput:
"""Press a key or key combination"""
# Split combination into individual keys
Expand Down Expand Up @@ -257,7 +253,6 @@ async def press_key(self, key_combination: str) -> RemoteBrowserOutput:
output=f"Pressed {key_combination}",
)

@_with_error_and_screenshot
async def type_text(self, text: str) -> RemoteBrowserOutput:
"""Type a string of text"""
await self.page.keyboard.type(text)
Expand All @@ -266,7 +261,6 @@ async def type_text(self, text: str) -> RemoteBrowserOutput:
output=f"Typed {text}",
)

@_with_error_and_screenshot
async def mouse_move(self, coordinate: tuple[int, int]) -> RemoteBrowserOutput:
"""Move mouse to specified coordinates"""
await self.mouse.move(*coordinate)
Expand All @@ -275,7 +269,6 @@ async def mouse_move(self, coordinate: tuple[int, int]) -> RemoteBrowserOutput:
output=f"Moved mouse to {coordinate}",
)

@_with_error_and_screenshot
async def left_click(self) -> RemoteBrowserOutput:
"""Perform left mouse click"""
x, y = await self._get_mouse_coordinates()
Expand All @@ -285,7 +278,6 @@ async def left_click(self) -> RemoteBrowserOutput:
output="Left clicked",
)

@_with_error_and_screenshot
async def left_click_drag(self, coordinate: tuple[int, int]) -> RemoteBrowserOutput:
"""Click and drag to specified coordinates"""
await self.mouse.down()
Expand All @@ -296,7 +288,6 @@ async def left_click_drag(self, coordinate: tuple[int, int]) -> RemoteBrowserOut
output=f"Left clicked and dragged to {coordinate}",
)

@_with_error_and_screenshot
async def right_click(self) -> RemoteBrowserOutput:
"""Perform right mouse click"""
x, y = await self._get_mouse_coordinates()
Expand All @@ -306,7 +297,6 @@ async def right_click(self) -> RemoteBrowserOutput:
output="Right clicked",
)

@_with_error_and_screenshot
async def middle_click(self) -> RemoteBrowserOutput:
"""Perform middle mouse click"""
x, y = await self._get_mouse_coordinates()
Expand All @@ -316,7 +306,6 @@ async def middle_click(self) -> RemoteBrowserOutput:
output="Middle clicked",
)

@_with_error_and_screenshot
async def double_click(self) -> RemoteBrowserOutput:
"""Perform double click"""
x, y = await self._get_mouse_coordinates()
Expand Down

0 comments on commit a59934a

Please sign in to comment.