Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes for Agent API GA #497

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ venv/
venv.bak/
.vscode/
.DS_Store
Pipfile
Pipfile.lock

# python artifacts
__pycache__
Expand All @@ -18,3 +20,4 @@ dist/
# build
build/
poetry.lock

55 changes: 54 additions & 1 deletion deepgram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from .errors import DeepgramApiKeyError

# listen/read client
from .client import Listen, Read
from .client import ListenRouter, ReadRouter, SpeakRouter, AgentRouter

# common
from .client import (
Expand Down Expand Up @@ -302,6 +302,59 @@
AsyncSelfHostedClient,
)


# agent
from .client import AgentWebSocketEvents

# websocket
from .client import (
AgentWebSocketClient,
AsyncAgentWebSocketClient,
)

from .client import (
#### common websocket response
# OpenResponse,
# CloseResponse,
# ErrorResponse,
# UnhandledResponse,
#### unique
WelcomeResponse,
SettingsAppliedResponse,
ConversationTextResponse,
UserStartedSpeakingResponse,
AgentThinkingResponse,
FunctionCalling,
FunctionCallRequest,
AgentStartedSpeakingResponse,
AgentAudioDoneResponse,
)

from .client import (
# top level
SettingsConfigurationOptions,
UpdateInstructionsOptions,
UpdateSpeakOptions,
InjectAgentMessageOptions,
FunctionCallResponse,
AgentKeepAlive,
# sub level
Listen,
Speak,
Header,
Item,
Properties,
Parameters,
Function,
Provider,
Think,
Agent,
Input,
Output,
Audio,
Context,
)

# utilities
# pylint: disable=wrong-import-position
from .audio import Microphone, DeepgramMicrophoneError
Expand Down
1 change: 1 addition & 0 deletions deepgram/audio/microphone/microphone.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging

from ...utils import verboselogs

from .constants import LOGGING, CHANNELS, RATE, CHUNK

if TYPE_CHECKING:
Expand Down
4 changes: 3 additions & 1 deletion deepgram/audio/speaker/speaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class Speaker: # pylint: disable=too-many-instance-attributes
# _asyncio_loop: asyncio.AbstractEventLoop
# _asyncio_thread: threading.Thread
_receiver_thread: Optional[threading.Thread] = None

_loop: Optional[asyncio.AbstractEventLoop] = None

_push_callback_org: Optional[Callable] = None
Expand Down Expand Up @@ -265,6 +264,7 @@ async def _start_asyncio_receiver(self):
await self._push_callback(message)
elif isinstance(message, bytes):
self._logger.verbose("Received audio data...")
await self._push_callback(message)
self.add_audio_to_queue(message)
except websockets.exceptions.ConnectionClosedOK as e:
self._logger.debug("send() exiting gracefully: %d", e.code)
Expand Down Expand Up @@ -297,6 +297,7 @@ def _start_threaded_receiver(self):
self._push_callback(message)
elif isinstance(message, bytes):
self._logger.verbose("Received audio data...")
self._push_callback(message)
self.add_audio_to_queue(message)
except Exception as e: # pylint: disable=broad-except
self._logger.notice("_start_threaded_receiver exception: %s", str(e))
Expand Down Expand Up @@ -365,6 +366,7 @@ def _play(self, audio_out, stream, stop):
"LastPlay delta is greater than threshold. Unmute!"
)
self._microphone.unmute()

data = audio_out.get(True, TIMEOUT)
with self._lock_wait:
self._last_datagram = datetime.now()
Expand Down
69 changes: 65 additions & 4 deletions deepgram/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
)

# listen client
from .clients import Listen, Read, Speak
from .clients import ListenRouter, ReadRouter, SpeakRouter, AgentRouter

# speech-to-text
from .clients import LiveClient, AsyncLiveClient # backward compat
Expand Down Expand Up @@ -308,6 +308,60 @@
AsyncSelfHostedClient,
)


# agent
from .clients import AgentWebSocketEvents

# websocket
from .clients import (
AgentWebSocketClient,
AsyncAgentWebSocketClient,
)

from .clients import (
#### common websocket response
# OpenResponse,
# CloseResponse,
# ErrorResponse,
# UnhandledResponse,
#### unique
WelcomeResponse,
SettingsAppliedResponse,
ConversationTextResponse,
UserStartedSpeakingResponse,
AgentThinkingResponse,
FunctionCalling,
FunctionCallRequest,
AgentStartedSpeakingResponse,
AgentAudioDoneResponse,
)

from .clients import (
# top level
SettingsConfigurationOptions,
UpdateInstructionsOptions,
UpdateSpeakOptions,
InjectAgentMessageOptions,
FunctionCallResponse,
AgentKeepAlive,
# sub level
Listen,
Speak,
Header,
Item,
Properties,
Parameters,
Function,
Provider,
Think,
Agent,
Input,
Output,
Audio,
Context,
)


# client errors and options
from .options import DeepgramClientOptions, ClientOptionsFromEnv
from .errors import DeepgramApiKeyError
Expand Down Expand Up @@ -397,21 +451,21 @@ def listen(self):
"""
Returns a Listen dot-notation router for interacting with Deepgram's transcription services.
"""
return Listen(self._config)
return ListenRouter(self._config)

@property
def read(self):
"""
Returns a Read dot-notation router for interacting with Deepgram's read services.
"""
return Read(self._config)
return ReadRouter(self._config)

@property
def speak(self):
"""
Returns a Speak dot-notation router for interacting with Deepgram's speak services.
"""
return Speak(self._config)
return SpeakRouter(self._config)

@property
@deprecation.deprecated(
Expand Down Expand Up @@ -480,6 +534,13 @@ def asyncselfhosted(self):
"""
return self.Version(self._config, "asyncselfhosted")

@property
def agent(self):
"""
Returns a Agent dot-notation router for interacting with Deepgram's speak services.
"""
return AgentRouter(self._config)

jpvajda marked this conversation as resolved.
Show resolved Hide resolved
# INTERNAL CLASSES
class Version:
"""
Expand Down
59 changes: 56 additions & 3 deletions deepgram/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@
)
from .errors import DeepgramModuleError

from .listen_router import Listen
from .read_router import Read
from .speak_router import Speak
from .listen_router import ListenRouter
from .read_router import ReadRouter
from .speak_router import SpeakRouter
from .agent_router import AgentRouter

# listen
from .listen import LiveTranscriptionEvents
Expand Down Expand Up @@ -318,3 +319,55 @@
SelfHostedClient,
AsyncSelfHostedClient,
)

# agent
from .agent import AgentWebSocketEvents

# websocket
from .agent import (
AgentWebSocketClient,
AsyncAgentWebSocketClient,
)

from .agent import (
#### common websocket response
# OpenResponse,
# CloseResponse,
# ErrorResponse,
# UnhandledResponse,
#### unique
WelcomeResponse,
SettingsAppliedResponse,
ConversationTextResponse,
UserStartedSpeakingResponse,
AgentThinkingResponse,
FunctionCalling,
FunctionCallRequest,
AgentStartedSpeakingResponse,
AgentAudioDoneResponse,
)

from .agent import (
# top level
SettingsConfigurationOptions,
UpdateInstructionsOptions,
UpdateSpeakOptions,
InjectAgentMessageOptions,
FunctionCallResponse,
AgentKeepAlive,
# sub level
Listen,
Speak,
Header,
Item,
Properties,
Parameters,
Function,
Provider,
Think,
Agent,
Input,
Output,
Audio,
Context,
)
54 changes: 54 additions & 0 deletions deepgram/clients/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT

from .enums import AgentWebSocketEvents

# websocket
from .client import (
AgentWebSocketClient,
AsyncAgentWebSocketClient,
)

from .client import (
#### common websocket response
OpenResponse,
CloseResponse,
ErrorResponse,
UnhandledResponse,
#### unique
WelcomeResponse,
SettingsAppliedResponse,
ConversationTextResponse,
UserStartedSpeakingResponse,
AgentThinkingResponse,
FunctionCalling,
FunctionCallRequest,
AgentStartedSpeakingResponse,
AgentAudioDoneResponse,
)

from .client import (
# top level
SettingsConfigurationOptions,
UpdateInstructionsOptions,
UpdateSpeakOptions,
InjectAgentMessageOptions,
FunctionCallResponse,
AgentKeepAlive,
# sub level
Listen,
Speak,
Header,
Item,
Properties,
Parameters,
Function,
Provider,
Think,
Agent,
Input,
Output,
Audio,
Context,
)
Loading