Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0509 audio starts before sesssion #564

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions autogen/agentchat/realtime_agent/clients/oai_realtime_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
#
# SPDX-License-Identifier: Apache-2.0

import asyncio
import json
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from logging import Logger, getLogger
from typing import TYPE_CHECKING, Any, Callable, Optional
from typing import TYPE_CHECKING, Any, Callable, List, Optional

import httpx
from openai import DEFAULT_MAX_RETRIES, NOT_GIVEN, AsyncOpenAI
Expand Down Expand Up @@ -291,11 +290,17 @@ async def send_text(self, *, role: Role, text: str) -> None:
# await self.connection.response.cancel() #why is this here?
await self._websocket.send_json(
{
"type": "connection.conversation.item.create",
"type": "response.cancel",
}
)
await self._websocket.send_json(
{
"type": "conversation.item.create",
"item": {"type": "message", "role": role, "content": [{"type": "input_text", "text": text}]},
}
)
# await self.connection.response.create()
await self._websocket.send_json({"type": "response.create"})

async def send_audio(self, audio: str) -> None:
"""Send audio to the OpenAI Realtime API.
Expand Down Expand Up @@ -338,15 +343,17 @@ async def session_update(self, session_options: dict[str, Any]) -> None:
await self._websocket.send_json({"type": "session.update", "session": session_options})
logger.info("Sending session update finished")

async def _initialize_session(self) -> None:
def session_init_data(self) -> List[dict[str, Any]]:
"""Control initial session with OpenAI."""
session_update = {
"turn_detection": {"type": "server_vad"},
"voice": self._voice,
"modalities": ["audio", "text"],
"temperature": self._temperature,
}
await self.session_update(session_options=session_update)
return [{"type": "session.update", "session": session_update}]

async def _initialize_session(self) -> None: ...

@asynccontextmanager
async def connect(self) -> AsyncGenerator[None, None]:
Expand Down Expand Up @@ -374,9 +381,8 @@ async def connect(self) -> AsyncGenerator[None, None]:
json_data = response.json()
json_data["model"] = self._model
if self._websocket is not None:
await self._websocket.send_json({"type": "ag2.init", "config": json_data})
await asyncio.sleep(10)
await self._initialize_session()
session_init = self.session_init_data()
await self._websocket.send_json({"type": "ag2.init", "config": json_data, "init": session_init})
yield
finally:
pass
Expand Down
Loading
Loading