Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into fix-websurfer-tools-f…
Browse files Browse the repository at this point in the history
…or-ollama
  • Loading branch information
davorrunje committed Feb 20, 2025
2 parents a552716 + 52901b2 commit 2079f30
Show file tree
Hide file tree
Showing 58 changed files with 1,638 additions and 162 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,5 @@ local_cache
notebook/result.png

notebook/coding

chroma
1 change: 1 addition & 0 deletions .muffet-excluded-links.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ example.com
rapidapi.com
openai.com
platform.openai.com
platform.deepseek.com
code.visualstudio.com
bloomberg.com
investor.nvidia.com
Expand Down
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ repos:
notebook/agentchat_graph_rag_neo4j.ipynb |
notebook/agentchat_swarm_graphrag_telemetry_trip_planner.ipynb |
website/node_modules/.* |
website/notebooks/.*
website/notebooks/.* |
test/agents/experimental/document_agent/pdf_parsed/Toast_financial_report.md |
test/agents/experimental/document_agent/pdf_parsed/nvidia_10k_2024.md
)$
# See https://jaredkhan.com/blog/mypy-pre-commit
- repo: local
Expand Down
129 changes: 129 additions & 0 deletions RFCs/000/RFC-000-global-run-function.ipynb.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import Any, Protocol, runtime_checkable\n",
"\n",
"from autogen import Agent\n",
"\n",
"\n",
"class RunResponse:\n",
" pass\n",
"\n",
"\n",
"class Cost:\n",
" def __init__(self, **kwargs: Any):\n",
" self._cost: dict[str, Any] = kwargs.copy()\n",
"\n",
" @staticmethod\n",
" def _add_elements(key: str, x: dict[str, Any], y: dict[str, Any]) -> Any:\n",
" if key in x and key in y:\n",
" return x[key] + y[key]\n",
" elif key in x:\n",
" return x[key]\n",
" elif key in y:\n",
" return y[key]\n",
" else:\n",
" raise KeyError(f\"Key {key} not found in either dictionary\")\n",
"\n",
" def __add__(self, other: \"Cost\") -> \"Cost\":\n",
" keys = set(self._cost.keys()) | set(other._cost.keys())\n",
" return Cost(**{key: self._add_elements(key, self._cost, other._cost) for key in keys})\n",
"\n",
"\n",
"@runtime_checkable\n",
"class EventProtocol(Protocol):\n",
" @property\n",
" def cost(self) -> Cost:\n",
" return Cost()\n",
"\n",
"\n",
"class RunResponse:\n",
" @property\n",
" def events(self) -> list[EventProtocol]:\n",
" pass\n",
"\n",
"\n",
"def run(\n",
" *agents: Agent,\n",
") -> RunResponse:\n",
" return RunResponse"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"agents: list[Agent] = []\n",
"\n",
"response = run(*agents, message=\"What is the meaning of life?\")\n",
"\n",
"total_cost = Cost(0)\n",
"for m in response.events:\n",
" total_cost += m.cost\n",
" if isinstance(m, InputRequest):\n",
" s = input(m.prompt)\n",
" m.respond(s)\n",
" elif isinstance(m, OutputMessage):\n",
" print(m.message)\n",
" elif isinstance(m, ToolRequest):\n",
" tool = m.tool"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv-3.10-core",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion autogen/agentchat/conversable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1743,7 +1743,7 @@ def get_chat_results(self, chat_index: Optional[int] = None) -> Union[list[ChatR
else:
return self._finished_chats

def reset(self):
def reset(self) -> None:
"""Reset the agent."""
self.clear_history()
self.reset_consecutive_auto_reply_counter()
Expand Down
2 changes: 2 additions & 0 deletions autogen/agentchat/realtime/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
# SPDX-License-Identifier: Apache-2.0

from .audio_adapters import TwilioAudioAdapter, WebSocketAudioAdapter
from .audio_observer import AudioObserver
from .function_observer import FunctionObserver
from .realtime_agent import RealtimeAgent
from .realtime_observer import RealtimeObserver
from .realtime_swarm import register_swarm

__all__ = [
"AudioObserver",
"FunctionObserver",
"RealtimeAgent",
"RealtimeObserver",
Expand Down
42 changes: 42 additions & 0 deletions autogen/agentchat/realtime/experimental/audio_observer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
#
# SPDX-License-Identifier: Apache-2.0

from typing import TYPE_CHECKING, Optional

from ....doc_utils import export_module
from .realtime_events import InputAudioBufferDelta, RealtimeEvent
from .realtime_observer import RealtimeObserver

if TYPE_CHECKING:
from logging import Logger


@export_module("autogen.agentchat.realtime.experimental")
class AudioObserver(RealtimeObserver):
"""Observer for user voice input"""

def __init__(self, *, logger: Optional["Logger"] = None) -> None:
"""Observer for user voice input"""
super().__init__(logger=logger)

async def on_event(self, event: RealtimeEvent) -> None:
"""Observe voice input events from the Realtime.
Args:
event (dict[str, Any]): The event from the OpenAI Realtime API.
"""
if isinstance(event, InputAudioBufferDelta):
self.logger.info("Received audio buffer delta")

async def initialize_session(self) -> None:
"""No need to initialize session from this observer"""
pass

async def run_loop(self) -> None:
"""Run the observer loop."""
pass


if TYPE_CHECKING:
function_observer: RealtimeObserver = AudioObserver()
13 changes: 10 additions & 3 deletions autogen/agentchat/realtime/experimental/clients/gemini/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from ......doc_utils import export_module
from ...realtime_events import AudioDelta, FunctionCall, RealtimeEvent, SessionCreated
from ..realtime_client import Role, register_realtime_client
from ..realtime_client import RealtimeClientBase, Role, register_realtime_client

if TYPE_CHECKING:
from websockets.asyncio.client import ClientConnection
Expand All @@ -30,7 +30,7 @@

@register_realtime_client()
@export_module("autogen.agentchat.realtime.experimental.clients")
class GeminiRealtimeClient:
class GeminiRealtimeClient(RealtimeClientBase):
"""(Experimental) Client for Gemini Realtime API."""

def __init__(
Expand All @@ -44,6 +44,7 @@ def __init__(
Args:
llm_config (dict[str, Any]): The config for the client.
"""
super().__init__()
self._llm_config = llm_config
self._logger = logger

Expand Down Expand Up @@ -123,6 +124,7 @@ async def send_audio(self, audio: str) -> None:
]
}
}
await self.queue_input_audio_buffer_delta(audio)
if self._is_reading_events:
await self.connection.send(json.dumps(msg))

Expand Down Expand Up @@ -185,13 +187,18 @@ async def connect(self) -> AsyncGenerator[None, None]:
self._connection = None

async def read_events(self) -> AsyncGenerator[RealtimeEvent, None]:
"""Read Events from the Gemini Realtime API."""
"""Read Events from the Gemini Realtime Client"""
if self._connection is None:
raise RuntimeError("Client is not connected, call connect() first.")
await self._initialize_session()

self._is_reading_events = True

async for event in self._read_events():
yield event

async def _read_from_connection(self) -> AsyncGenerator[RealtimeEvent, None]:
"""Read messages from the Gemini Realtime connection."""
async for raw_message in self.connection:
message = raw_message.decode("ascii") if isinstance(raw_message, bytes) else raw_message
events = self._parse_message(json.loads(message))
Expand Down
17 changes: 12 additions & 5 deletions autogen/agentchat/realtime/experimental/clients/oai/base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from ......doc_utils import export_module
from ...realtime_events import RealtimeEvent
from ..realtime_client import Role, register_realtime_client
from ..realtime_client import RealtimeClientBase, Role, register_realtime_client
from .utils import parse_oai_message

if TYPE_CHECKING:
Expand All @@ -25,7 +25,7 @@

@register_realtime_client()
@export_module("autogen.agentchat.realtime.experimental.clients")
class OpenAIRealtimeClient:
class OpenAIRealtimeClient(RealtimeClientBase):
"""(Experimental) Client for OpenAI Realtime API."""

def __init__(
Expand All @@ -39,6 +39,7 @@ def __init__(
Args:
llm_config (dict[str, Any]): The config for the client.
"""
super().__init__()
self._llm_config = llm_config
self._logger = logger

Expand Down Expand Up @@ -110,6 +111,7 @@ async def send_audio(self, audio: str) -> None:
Args:
audio (str): The audio to send.
"""
await self.queue_input_audio_buffer_delta(audio)
await self.connection.input_audio_buffer.append(audio=audio)

async def truncate_audio(self, audio_end_ms: int, content_index: int, item_id: str) -> None:
Expand Down Expand Up @@ -163,13 +165,18 @@ async def read_events(self) -> AsyncGenerator[RealtimeEvent, None]:
raise RuntimeError("Client is not connected, call connect() first.")

try:
async for message in self._connection:
for event in self._parse_message(message.model_dump()):
yield event
async for event in self._read_events():
yield event

finally:
self._connection = None

async def _read_from_connection(self) -> AsyncGenerator[RealtimeEvent, None]:
"""Read messages from the OpenAI Realtime API."""
async for message in self._connection:
for event in self._parse_message(message.model_dump()):
yield event

def _parse_message(self, message: dict[str, Any]) -> list[RealtimeEvent]:
"""Parse a message from the OpenAI Realtime API.
Expand Down
15 changes: 11 additions & 4 deletions autogen/agentchat/realtime/experimental/clients/oai/rtc_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from ......doc_utils import export_module
from ...realtime_events import RealtimeEvent
from ..realtime_client import Role, register_realtime_client
from ..realtime_client import RealtimeClientBase, Role, register_realtime_client
from .utils import parse_oai_message

if TYPE_CHECKING:
Expand All @@ -26,7 +26,7 @@

@register_realtime_client()
@export_module("autogen.agentchat.realtime.experimental.clients.oai")
class OpenAIRealtimeWebRTCClient:
class OpenAIRealtimeWebRTCClient(RealtimeClientBase):
"""(Experimental) Client for OpenAI Realtime API that uses WebRTC protocol."""

def __init__(
Expand All @@ -41,6 +41,7 @@ def __init__(
Args:
llm_config (dict[str, Any]): The config for the client.
"""
super().__init__()
self._llm_config = llm_config
self._logger = logger
self._websocket = websocket
Expand Down Expand Up @@ -94,11 +95,12 @@ async def send_text(self, *, role: Role, text: str) -> None:

async def send_audio(self, audio: str) -> None:
"""Send audio to the OpenAI Realtime API.
in case of WebRTC, audio is already sent by js client, so we just queue it in order to be logged.
Args:
audio (str): The audio to send.
"""
await self._websocket.send_json({"type": "input_audio_buffer.append", "audio": audio})
await self.queue_input_audio_buffer_delta(audio)

async def truncate_audio(self, audio_end_ms: int, content_index: int, item_id: str) -> None:
"""Truncate audio in the OpenAI Realtime API.
Expand Down Expand Up @@ -176,7 +178,12 @@ async def connect(self) -> AsyncGenerator[None, None]:
pass

async def read_events(self) -> AsyncGenerator[RealtimeEvent, None]:
"""Read messages from the OpenAI Realtime API.
"""Read events from the OpenAI Realtime API."""
async for event in self._read_events():
yield event

async def _read_from_connection(self) -> AsyncGenerator[RealtimeEvent, None]:
"""Read messages from the OpenAI Realtime API connection.
Again, in case of WebRTC, we do not read OpenAI messages directly since we
do not hold connection to OpenAI. Instead we read messages from the websocket, and javascript
client on the other side of the websocket that is connected to OpenAI is relaying events to us.
Expand Down
Loading

0 comments on commit 2079f30

Please sign in to comment.