freddyaboulton · freddyaboulton · Mar 4, 2025 · Mar 4, 2025 · Mar 4, 2025 · Mar 4, 2025
diff --git a/backend/fastrtc/reply_on_pause.py b/backend/fastrtc/reply_on_pause.py
@@ -116,7 +116,9 @@ def __init__(
         self.is_async = inspect.isasyncgenfunction(fn)
         self.event = Event()
         self.state = AppState()
-        self.generator: Generator[EmitType, None, None] | None = None
+        self.generator: (
+            Generator[EmitType, None, None] | AsyncGenerator[EmitType, None] | None
+        ) = None
         self.model_options = model_options
         self.algo_options = algo_options or AlgoOptions()
 
@@ -184,8 +186,29 @@ def receive(self, frame: tuple[int, np.ndarray]) -> None:
             self.event.set()
             if self.can_interrupt:
                 self.clear_queue()
+                self._close_generator()
                 self.generator = None
 
+    def _close_generator(self):
+        """Properly close the generator to ensure resources are released."""
+        if self.generator is None:
+            return
+
+        try:
+            if self.is_async:
+                # For async generators, we need to call aclose()
+                if hasattr(self.generator, "aclose"):
+                    asyncio.run_coroutine_threadsafe(
+                        cast(AsyncGenerator[EmitType, None], self.generator).aclose(),
+                        self.loop,
+                    ).result(timeout=1.0)  # Add timeout to prevent blocking
+            else:
+                # For sync generators, we can just exhaust it or close it
+                if hasattr(self.generator, "close"):
+                    cast(Generator[EmitType, None, None], self.generator).close()
+        except Exception as e:
+            logger.debug(f"Error closing generator: {e}")
+
     def reset(self):
         super().reset()
         if self.phone_mode:

diff --git a/demo/llm_voice_chat/app.py b/demo/llm_voice_chat/app.py
@@ -49,14 +49,17 @@ def response(
     )
 
     chatbot.append({"role": "assistant", "content": response_text})
-    yield AdditionalOutputs(chatbot)
 
-    for chunk in tts_client.text_to_speech.convert_as_stream(
-        text=response_text,  # type: ignore
-        voice_id="JBFqnCBsd6RMkjVDRZzb",
-        model_id="eleven_multilingual_v2",
-        output_format="pcm_24000",
+    for i, chunk in enumerate(
+        tts_client.text_to_speech.convert_as_stream(
+            text=response_text,  # type: ignore
+            voice_id="JBFqnCBsd6RMkjVDRZzb",
+            model_id="eleven_multilingual_v2",
+            output_format="pcm_24000",
+        )
     ):
+        if i == 0:
+            yield AdditionalOutputs(chatbot)
         audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
         yield (24000, audio_array)
 

diff --git a/demo/talk_to_smolagents/README.md b/demo/talk_to_smolagents/README.md
@@ -9,7 +9,7 @@ app_file: app.py
 pinned: false
 license: mit
 short_description: FastRTC Voice Agent with smolagents
-tags: [webrtc, websocket, gradio, secret|HF_TOKEN]
+tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
 ---
 
 # Voice LLM Agent with Image Generation

diff --git a/docs/cookbook.md b/docs/cookbook.md
@@ -140,10 +140,8 @@ document.querySelectorAll('.tag-button').forEach(button => {
 
     <video width=98% src="https://github.com/user-attachments/assets/ddf39ef7-fa7b-417e-8342-de3b9e311891" controls style="text-align: center"></video>
 
-    [:octicons-arrow-right-24: Demo](https://huggingface.co/spaces/fastrtc/talk-to-claude)
+    [:octicons-arrow-right-24: Demo](https://huggingface.co/spaces/burtenshaw/coworking_agent/)
 
-    [:octicons-arrow-right-24: Gradio UI](https://huggingface.co/spaces/fastrtc/talk-to-claude-gradio)
-
     [:octicons-code-16: Code](https://huggingface.co/spaces/burtenshaw/coworking_agent/blob/main/app.py)
 
 -   :speaking_head:{ .lg .middle } __Talk to Claude__

diff --git a/docs/userguide/audio.md b/docs/userguide/audio.md
@@ -3,7 +3,6 @@
 
 Typically, you want to run a python function whenever a user has stopped speaking. This can be done by wrapping a python generator with the `ReplyOnPause` class and passing it to the `handler` argument of the `Stream` object. The `ReplyOnPause` class will handle the voice detection and turn taking logic automatically!
 
-By default, the `ReplyOnPause` handler will allow you to interrupt the response at any time by speaking again. If you do not want to allow interruption, you can set the `can_interrupt` parameter to `False`.
 
 === "Code"
     ```python
@@ -35,14 +34,13 @@ By default, the `ReplyOnPause` handler will allow you to interrupt the response
     You can also use an async generator with `ReplyOnPause`.
 
 !!! tip "Parameters"
-    You can customize the voice detection parameters by passing in `algo_options` and `model_options` to the `ReplyOnPause` class. Also, you can set the `can_interrupt` parameter to `False` to prevent the user from interrupting the response. By default, `can_interrupt` is `True`.
+    You can customize the voice detection parameters by passing in `algo_options` and `model_options` to the `ReplyOnPause` class.
     ```python
     from fastrtc import AlgoOptions, SileroVadOptions
 
     stream = Stream(
         handler=ReplyOnPause(
             response,
-            can_interrupt=True,
             algo_options=AlgoOptions(
                 audio_chunk_duration=0.6,
                 started_talking_threshold=0.2,
@@ -57,6 +55,27 @@ By default, the `ReplyOnPause` handler will allow you to interrupt the response
     )
     ```
 
+### Interruptions
+
+By default, the `ReplyOnPause` handler will allow you to interrupt the response at any time by speaking again. If you do not want to allow interruption, you can set the `can_interrupt` parameter to `False`.
+
+```python
+from fastrtc import Stream, ReplyOnPause
+
+stream = Stream(
+    handler=ReplyOnPause(
+        response,
+        can_interrupt=True,
+    )
+)
+```
+
+<video width=98% src="https://github.com/user-attachments/assets/dba68dd7-7444-439b-b948-59171067e850" controls style="text-align: center"></video>
+
+
+!!! tip "Muting Response Audio"
+    You can directly talk over the output audio and the interruption will still work. However, in these cases, the audio transcription may be incorrect. To prevent this, it's best practice to mute the output audio before talking over it.
+
 ## Reply On Stopwords
 
 You can configure your AI model to run whenever a set of "stop words" are detected, like "Hey Siri" or "computer", with the `ReplyOnStopWords` class. 

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "fastrtc"
-version = "0.0.11"
+version = "0.0.12"
 description = "The realtime communication library for Python"
 readme = "README.md"
 license = "apache-2.0"

diff --git a/upload_space.py b/upload_space.py
@@ -95,7 +95,7 @@ def upload_space(dir_path: str):
 
     readme_path = path / "README.md"
 
-    if path.name not in NO_GRADIO_SPACE:
+    if path.name not in NO_GRADIO_SPACE and (path / "README_gradio.md").exists():
         try:
             # Upload Gradio version with modified README
             api.upload_folder(