Skip to content

Commit

Permalink
Willy/realtime (Chainlit#1401)
Browse files Browse the repository at this point in the history
* Bump literalai depdendency.

* Update literalai imports

* Remove unused imports from SQLAlchemy tests.

* Unit tests for LiteralDataLayer.

* Consistent LiteralAI to Chainlit conversion, resolve PaginatedResponse exceptions.

- Create LiteralToChainlitConverter class for handling conversions
- Implement methods for converting steps, threads, and attachments
- Add support for different Element subclasses based on metadata
- Allow manual setting of thread_id and id for Step and Element

* Attempt to satisfy mypy (plus cleaner approach).

* feat: add realtime audio

* fix: default config

* fix: lint

---------

Co-authored-by: Mathijs de Bruin <[email protected]>
Co-authored-by: EWouters <[email protected]>
  • Loading branch information
3 people authored Oct 4, 2024
1 parent 79639b6 commit 8882619
Show file tree
Hide file tree
Showing 36 changed files with 2,007 additions and 400 deletions.
7 changes: 5 additions & 2 deletions backend/chainlit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
)
from chainlit.step import Step, step
from chainlit.sync import make_async, run_sync
from chainlit.types import AudioChunk, ChatProfile, Starter
from chainlit.types import InputAudioChunk, OutputAudioChunk, ChatProfile, Starter
from chainlit.user import PersistedUser, User
from chainlit.user_session import user_session
from chainlit.utils import make_module_getattr
Expand All @@ -56,6 +56,7 @@
author_rename,
header_auth_callback,
oauth_callback,
on_audio_start,
on_audio_chunk,
on_audio_end,
on_chat_end,
Expand Down Expand Up @@ -117,7 +118,8 @@ def acall(self):
"user_session",
"chat_context",
"CopilotFunction",
"AudioChunk",
"InputAudioChunk",
"OutputAudioChunk",
"Action",
"User",
"PersistedUser",
Expand Down Expand Up @@ -176,6 +178,7 @@ def acall(self):
"set_chat_profiles",
"set_starters",
"on_chat_end",
"on_audio_start",
"on_audio_chunk",
"on_audio_end",
"author_rename",
Expand Down
17 changes: 13 additions & 4 deletions backend/chainlit/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,25 @@ def on_chat_end(func: Callable) -> Callable:
return func


@trace
def on_audio_start(func: Callable) -> Callable:
"""
Hook to react to the user initiating audio.
Returns:
Callable[], Any]: The decorated hook.
"""

config.code.on_audio_start = wrap_user_function(func, with_task=False)
return func

@trace
def on_audio_chunk(func: Callable) -> Callable:
"""
Hook to react to the audio chunks being sent.
Args:
chunk (AudioChunk): The audio chunk being sent.
chunk (InputAudioChunk): The audio chunk being sent.
Returns:
Callable[], Any]: The decorated hook.
Expand All @@ -230,9 +242,6 @@ def on_audio_end(func: Callable) -> Callable:
"""
Hook to react to the audio stream ending. This is called after the last audio chunk is sent.
Args:
elements ([List[Element]): The files that were uploaded before starting the audio stream (if any).
Returns:
Callable[], Any]: The decorated hook.
"""
Expand Down
27 changes: 6 additions & 21 deletions backend/chainlit/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@

if TYPE_CHECKING:
from chainlit.action import Action
from chainlit.element import ElementBased
from chainlit.message import Message
from chainlit.types import AudioChunk, ChatProfile, Starter, ThreadDict
from chainlit.types import InputAudioChunk, ChatProfile, Starter, ThreadDict
from chainlit.user import User
from fastapi import Request, Response

Expand Down Expand Up @@ -93,18 +92,8 @@
max_size_mb = 500
[features.audio]
# Threshold for audio recording
min_decibels = -45
# Delay for the user to start speaking in MS
initial_silence_timeout = 3000
# Delay for the user to continue speaking in MS. If the user stops speaking for this duration, the recording will stop.
silence_timeout = 1500
# Above this duration (MS), the recording will forcefully stop.
max_duration = 15000
# Duration of the audio chunks in MS
chunk_duration = 1000
# Sample rate of the audio
sample_rate = 44100
sample_rate = 24000
[UI]
# Name of the assistant.
Expand Down Expand Up @@ -237,12 +226,7 @@ class SpontaneousFileUploadFeature(DataClassJsonMixin):

@dataclass
class AudioFeature(DataClassJsonMixin):
min_decibels: int = -45
initial_silence_timeout: int = 2000
silence_timeout: int = 1500
chunk_duration: int = 1000
max_duration: int = 15000
sample_rate: int = 44100
sample_rate: int = 24000
enabled: bool = False


Expand Down Expand Up @@ -297,8 +281,9 @@ class CodeSettings:
on_chat_end: Optional[Callable[[], Any]] = None
on_chat_resume: Optional[Callable[["ThreadDict"], Any]] = None
on_message: Optional[Callable[["Message"], Any]] = None
on_audio_chunk: Optional[Callable[["AudioChunk"], Any]] = None
on_audio_end: Optional[Callable[[List["ElementBased"]], Any]] = None
on_audio_start: Optional[Callable[[], Any]] = None
on_audio_chunk: Optional[Callable[["InputAudioChunk"], Any]] = None
on_audio_end: Optional[Callable[[], Any]] = None

author_rename: Optional[Callable[[str], Awaitable[str]]] = None
on_settings_update: Optional[Callable[[Dict[str, Any]], Any]] = None
Expand Down
25 changes: 25 additions & 0 deletions backend/chainlit/emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
FileReference,
MessagePayload,
ThreadDict,
OutputAudioChunk
)
from chainlit.user import PersistedUser
from literalai.helper import utc_now
Expand Down Expand Up @@ -51,6 +52,18 @@ async def resume_thread(self, thread_dict: ThreadDict):
async def send_element(self, element_dict: ElementDict):
"""Stub method to send an element to the UI."""
pass

async def update_audio_connection(self, state: Literal["on", "off"]):
"""Audio connection signaling."""
pass

async def send_audio_chunk(self, chunk: OutputAudioChunk):
"""Stub method to send an audio chunk to the UI."""
pass

async def send_audio_interrupt(self):
"""Stub method to interrupt the current audio response."""
pass

async def send_step(self, step_dict: StepDict):
"""Stub method to send a message to the UI."""
Expand Down Expand Up @@ -157,6 +170,18 @@ def resume_thread(self, thread_dict: ThreadDict):
"""Send a thread to the UI to resume it"""
return self.emit("resume_thread", thread_dict)

async def update_audio_connection(self, state: Literal["on", "off"]):
"""Audio connection signaling."""
await self.emit("audio_connection", state)

async def send_audio_chunk(self, chunk: OutputAudioChunk):
"""Send an audio chunk to the UI."""
await self.emit("audio_chunk", chunk)

async def send_audio_interrupt(self):
"""Method to interrupt the current audio response."""
await self.emit("audio_interrupt", {})

async def send_element(self, element_dict: ElementDict):
"""Stub method to send an element to the UI."""
await self.emit("element", element_dict)
Expand Down
36 changes: 19 additions & 17 deletions backend/chainlit/socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
from chainlit.session import WebsocketSession
from chainlit.telemetry import trace_event
from chainlit.types import (
AudioChunk,
AudioChunkPayload,
AudioEndPayload,
InputAudioChunk,
InputAudioChunkPayload,
MessagePayload,
)
from chainlit.user_session import user_sessions
Expand Down Expand Up @@ -314,19 +313,31 @@ async def message(sid, payload: MessagePayload):
session.current_task = task


@sio.on("audio_start")
async def audio_start(sid):
"""Handle audio init."""
session = WebsocketSession.require(sid)

context = init_ws_context(session)
if config.code.on_audio_start:
connected = bool(await config.code.on_audio_start())
connection_state = "on" if connected else "off"
await context.emitter.update_audio_connection(connection_state)


@sio.on("audio_chunk")
async def audio_chunk(sid, payload: AudioChunkPayload):
async def audio_chunk(sid, payload: InputAudioChunkPayload):
"""Handle an audio chunk sent by the user."""
session = WebsocketSession.require(sid)

init_ws_context(session)

if config.code.on_audio_chunk:
asyncio.create_task(config.code.on_audio_chunk(AudioChunk(**payload)))
asyncio.create_task(config.code.on_audio_chunk(InputAudioChunk(**payload)))


@sio.on("audio_end")
async def audio_end(sid, payload: AudioEndPayload):
async def audio_end(sid):
"""Handle the end of the audio stream."""
session = WebsocketSession.require(sid)
try:
Expand All @@ -337,18 +348,9 @@ async def audio_end(sid, payload: AudioEndPayload):
session.has_first_interaction = True
asyncio.create_task(context.emitter.init_thread("audio"))

file_elements = []
if config.code.on_audio_end:
file_refs = payload.get("fileReferences")
if file_refs:
files = [
session.files[file["id"]]
for file in file_refs
if file["id"] in session.files
]
file_elements = [Element.from_dict(file) for file in files]

await config.code.on_audio_end(file_elements)
await config.code.on_audio_end()

except asyncio.CancelledError:
pass
except Exception as e:
Expand Down
3 changes: 2 additions & 1 deletion backend/chainlit/translations/en-US.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@
},
"speechButton": {
"start": "Start recording",
"stop": "Stop recording"
"stop": "Stop recording",
"loading": "Connecting"
},
"SubmitButton": {
"sendMessage": "Send message",
Expand Down
12 changes: 6 additions & 6 deletions backend/chainlit/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,24 +154,24 @@ class MessagePayload(TypedDict):
fileReferences: Optional[List[FileReference]]


class AudioChunkPayload(TypedDict):
class InputAudioChunkPayload(TypedDict):
isStart: bool
mimeType: str
elapsedTime: float
data: bytes


@dataclass
class AudioChunk:
class InputAudioChunk:
isStart: bool
mimeType: str
elapsedTime: float
data: bytes


class AudioEndPayload(TypedDict):
fileReferences: Optional[List[FileReference]]

class OutputAudioChunk(TypedDict):
track: str
mimeType: str
data: bytes

@dataclass
class AskFileResponse:
Expand Down
2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "chainlit"
version = "1.3.0rc0"
version = "1.3.0rc1"
keywords = [
'LLM',
'Agents',
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/assets/microphone.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const MicrophoneIcon = (props: SvgIconProps) => {
>
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z" />
<path d="M19 10v2a7 7 0 0 1-14 0v-2" />
<line x1="12" x2="12" y1="19" y2="22" />{' '}
<line x1="12" x2="12" y1="19" y2="22" />
</SvgIcon>
);
};
Expand Down
26 changes: 26 additions & 0 deletions frontend/src/assets/microphoneOff.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import SvgIcon, { SvgIconProps } from '@mui/material/SvgIcon';

const MicrophoneOffIcon = (props: SvgIconProps) => {
return (
<SvgIcon
{...props}
style={{
strokeLinecap: 'round',
strokeLinejoin: 'round',
strokeWidth: 2,
fill: 'none',
stroke: 'currentColor'
}}
viewBox="0 0 24 24"
>
<line x1="2" x2="22" y1="2" y2="22" />
<path d="M18.89 13.23A7.12 7.12 0 0 0 19 12v-2" />
<path d="M5 10v2a7 7 0 0 0 12 5" />
<path d="M15 9.34V5a3 3 0 0 0-5.68-1.33" />
<path d="M9 9v3a3 3 0 0 0 5.12 2.12" />
<line x1="12" x2="12" y1="19" y2="22" />
</SvgIcon>
);
};

export default MicrophoneOffIcon;
2 changes: 2 additions & 0 deletions frontend/src/components/molecules/messages/Messages.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ const Messages = memo(
<>
{m.steps?.length ? (
<Messages
key={m.id}
messages={m.steps}
elements={elements}
actions={actions}
Expand All @@ -71,6 +72,7 @@ const Messages = memo(
/>
) : null}
<MessageLoader
key={m.id + 'loader'}
show={showToolCoTLoader || showHiddenCoTLoader}
/>
</>
Expand Down
Loading

0 comments on commit 8882619

Please sign in to comment.