From 6517a934725a3bf136d406211e4484d64d49f058 Mon Sep 17 00:00:00 2001
From: Freddy Boulton <alfonsoboulton@gmail.com>
Date: Tue, 4 Mar 2025 18:08:10 -0500
Subject: [PATCH] Clean up cancelled generators (#124)

* fix links

* fix upload

* add code

* Add code

---------

Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>
---
 backend/fastrtc/reply_on_pause.py | 25 ++++++++++++++++++++++++-
 demo/llm_voice_chat/app.py        | 15 +++++++++------
 demo/talk_to_smolagents/README.md |  2 +-
 docs/cookbook.md                  |  4 +---
 docs/userguide/audio.md           | 25 ++++++++++++++++++++++---
 pyproject.toml                    |  2 +-
 upload_space.py                   |  2 +-
 7 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/backend/fastrtc/reply_on_pause.py b/backend/fastrtc/reply_on_pause.py
index 2591391..78fedd0 100644
--- a/backend/fastrtc/reply_on_pause.py
+++ b/backend/fastrtc/reply_on_pause.py
@@ -116,7 +116,9 @@ def __init__(
         self.is_async = inspect.isasyncgenfunction(fn)
         self.event = Event()
         self.state = AppState()
-        self.generator: Generator[EmitType, None, None] | None = None
+        self.generator: (
+            Generator[EmitType, None, None] | AsyncGenerator[EmitType, None] | None
+        ) = None
         self.model_options = model_options
         self.algo_options = algo_options or AlgoOptions()
 
@@ -184,8 +186,29 @@ def receive(self, frame: tuple[int, np.ndarray]) -> None:
             self.event.set()
             if self.can_interrupt:
                 self.clear_queue()
+                self._close_generator()
                 self.generator = None
 
+    def _close_generator(self):
+        """Properly close the generator to ensure resources are released."""
+        if self.generator is None:
+            return
+
+        try:
+            if self.is_async:
+                # For async generators, we need to call aclose()
+                if hasattr(self.generator, "aclose"):
+                    asyncio.run_coroutine_threadsafe(
+                        cast(AsyncGenerator[EmitType, None], self.generator).aclose(),
+                        self.loop,
+                    ).result(timeout=1.0)  # Add timeout to prevent blocking
+            else:
+                # For sync generators, we can just exhaust it or close it
+                if hasattr(self.generator, "close"):
+                    cast(Generator[EmitType, None, None], self.generator).close()
+        except Exception as e:
+            logger.debug(f"Error closing generator: {e}")
+
     def reset(self):
         super().reset()
         if self.phone_mode:
diff --git a/demo/llm_voice_chat/app.py b/demo/llm_voice_chat/app.py
index bbf8647..9de3c89 100644
--- a/demo/llm_voice_chat/app.py
+++ b/demo/llm_voice_chat/app.py
@@ -49,14 +49,17 @@ def response(
     )
 
     chatbot.append({"role": "assistant", "content": response_text})
-    yield AdditionalOutputs(chatbot)
 
-    for chunk in tts_client.text_to_speech.convert_as_stream(
-        text=response_text,  # type: ignore
-        voice_id="JBFqnCBsd6RMkjVDRZzb",
-        model_id="eleven_multilingual_v2",
-        output_format="pcm_24000",
+    for i, chunk in enumerate(
+        tts_client.text_to_speech.convert_as_stream(
+            text=response_text,  # type: ignore
+            voice_id="JBFqnCBsd6RMkjVDRZzb",
+            model_id="eleven_multilingual_v2",
+            output_format="pcm_24000",
+        )
     ):
+        if i == 0:
+            yield AdditionalOutputs(chatbot)
         audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
         yield (24000, audio_array)
 
diff --git a/demo/talk_to_smolagents/README.md b/demo/talk_to_smolagents/README.md
index 8a3b806..79c593c 100644
--- a/demo/talk_to_smolagents/README.md
+++ b/demo/talk_to_smolagents/README.md
@@ -9,7 +9,7 @@ app_file: app.py
 pinned: false
 license: mit
 short_description: FastRTC Voice Agent with smolagents
-tags: [webrtc, websocket, gradio, secret|HF_TOKEN]
+tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
 ---
 
 # Voice LLM Agent with Image Generation
diff --git a/docs/cookbook.md b/docs/cookbook.md
index b3f8c52..d2dc6aa 100644
--- a/docs/cookbook.md
+++ b/docs/cookbook.md
@@ -140,10 +140,8 @@ document.querySelectorAll('.tag-button').forEach(button => {
 
     <video width=98% src="https://github.com/user-attachments/assets/ddf39ef7-fa7b-417e-8342-de3b9e311891" controls style="text-align: center"></video>
 
-    [:octicons-arrow-right-24: Demo](https://huggingface.co/spaces/fastrtc/talk-to-claude)
+    [:octicons-arrow-right-24: Demo](https://huggingface.co/spaces/burtenshaw/coworking_agent/)
 
-    [:octicons-arrow-right-24: Gradio UI](https://huggingface.co/spaces/fastrtc/talk-to-claude-gradio)
-    
     [:octicons-code-16: Code](https://huggingface.co/spaces/burtenshaw/coworking_agent/blob/main/app.py)
 
 -   :speaking_head:{ .lg .middle } __Talk to Claude__
diff --git a/docs/userguide/audio.md b/docs/userguide/audio.md
index 49357ca..284571f 100644
--- a/docs/userguide/audio.md
+++ b/docs/userguide/audio.md
@@ -3,7 +3,6 @@
 
 Typically, you want to run a python function whenever a user has stopped speaking. This can be done by wrapping a python generator with the `ReplyOnPause` class and passing it to the `handler` argument of the `Stream` object. The `ReplyOnPause` class will handle the voice detection and turn taking logic automatically!
 
-By default, the `ReplyOnPause` handler will allow you to interrupt the response at any time by speaking again. If you do not want to allow interruption, you can set the `can_interrupt` parameter to `False`.
 
 === "Code"
     ```python
@@ -35,14 +34,13 @@ By default, the `ReplyOnPause` handler will allow you to interrupt the response
     You can also use an async generator with `ReplyOnPause`.
 
 !!! tip "Parameters"
-    You can customize the voice detection parameters by passing in `algo_options` and `model_options` to the `ReplyOnPause` class. Also, you can set the `can_interrupt` parameter to `False` to prevent the user from interrupting the response. By default, `can_interrupt` is `True`.
+    You can customize the voice detection parameters by passing in `algo_options` and `model_options` to the `ReplyOnPause` class.
     ```python
     from fastrtc import AlgoOptions, SileroVadOptions
 
     stream = Stream(
         handler=ReplyOnPause(
             response,
-            can_interrupt=True,
             algo_options=AlgoOptions(
                 audio_chunk_duration=0.6,
                 started_talking_threshold=0.2,
@@ -57,6 +55,27 @@ By default, the `ReplyOnPause` handler will allow you to interrupt the response
     )
     ```
 
+### Interruptions
+
+By default, the `ReplyOnPause` handler will allow you to interrupt the response at any time by speaking again. If you do not want to allow interruption, you can set the `can_interrupt` parameter to `False`.
+
+```python
+from fastrtc import Stream, ReplyOnPause
+
+stream = Stream(
+    handler=ReplyOnPause(
+        response,
+        can_interrupt=True,
+    )
+)
+```
+
+<video width=98% src="https://github.com/user-attachments/assets/dba68dd7-7444-439b-b948-59171067e850" controls style="text-align: center"></video>
+
+
+!!! tip "Muting Response Audio"
+    You can directly talk over the output audio and the interruption will still work. However, in these cases, the audio transcription may be incorrect. To prevent this, it's best practice to mute the output audio before talking over it.
+
 ## Reply On Stopwords
 
 You can configure your AI model to run whenever a set of "stop words" are detected, like "Hey Siri" or "computer", with the `ReplyOnStopWords` class. 
diff --git a/pyproject.toml b/pyproject.toml
index fa4f102..c69434f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "fastrtc"
-version = "0.0.11"
+version = "0.0.12"
 description = "The realtime communication library for Python"
 readme = "README.md"
 license = "apache-2.0"
diff --git a/upload_space.py b/upload_space.py
index 3e72229..088d93d 100644
--- a/upload_space.py
+++ b/upload_space.py
@@ -95,7 +95,7 @@ def upload_space(dir_path: str):
 
     readme_path = path / "README.md"
 
-    if path.name not in NO_GRADIO_SPACE:
+    if path.name not in NO_GRADIO_SPACE and (path / "README_gradio.md").exists():
         try:
             # Upload Gradio version with modified README
             api.upload_folder(