Moonshine live (#86)

* add code * moonshine --------- Co-authored-by: Freddy Boulton <[email protected]>
freddyaboulton · Feb 26, 2025 · e44341d · e44341d
1 parent 4f46003
commit e44341d
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 0 deletions.
diff --git a/demo/moonshine_live/README.md b/demo/moonshine_live/README.md
@@ -0,0 +1,16 @@
+---
+title: Moonshine Live Transcription
+emoji: 🌕
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.17.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Real-time captions with Moonshine ONNX
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
+models: [onnx-community/moonshine-base-ONNX, UsefulSensors/moonshine-base]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
diff --git a/demo/moonshine_live/app.py b/demo/moonshine_live/app.py
@@ -0,0 +1,64 @@
+from fastrtc import (
+    Stream,
+    AdditionalOutputs,
+    audio_to_float32,
+    ReplyOnPause,
+    get_twilio_turn_credentials,
+)
+from functools import lru_cache
+import gradio as gr
+from typing import Generator, Literal
+from numpy.typing import NDArray
+import numpy as np
+from moonshine_onnx import MoonshineOnnxModel, load_tokenizer
+
+
+@lru_cache(maxsize=None)
+def load_moonshine(
+    model_name: Literal["moonshine/base", "moonshine/tiny"],
+) -> MoonshineOnnxModel:
+    return MoonshineOnnxModel(model_name=model_name)
+
+
+tokenizer = load_tokenizer()
+
+
+def stt(
+    audio: tuple[int, NDArray[np.int16 | np.float32]],
+    model_name: Literal["moonshine/base", "moonshine/tiny"],
+) -> Generator[AdditionalOutputs, None, None]:
+    moonshine = load_moonshine(model_name)
+    sr, audio_np = audio  # type: ignore
+    if audio_np.dtype == np.int16:
+        audio_np = audio_to_float32(audio)
+    if audio_np.ndim == 1:
+        audio_np = audio_np.reshape(1, -1)
+    tokens = moonshine.generate(audio_np)
+    yield AdditionalOutputs(tokenizer.decode_batch(tokens)[0])
+
+
+stream = Stream(
+    ReplyOnPause(stt, input_sample_rate=16000),
+    modality="audio",
+    mode="send",
+    ui_args={
+        "title": "Live Captions by Moonshine",
+        "icon": "default-favicon.ico",
+        "icon_button_color": "#5c5c5c",
+        "pulse_color": "#a7c6fc",
+        "icon_radius": 0,
+    },
+    rtc_configuration=get_twilio_turn_credentials(),
+    additional_inputs=[
+        gr.Radio(
+            choices=["moonshine/base", "moonshine/tiny"],
+            value="moonshine/base",
+            label="Model",
+        )
+    ],
+    additional_outputs=[gr.Textbox(label="Captions")],
+    additional_outputs_handler=lambda prev, current: (prev + "\n" + current).strip(),
+)
+
+if __name__ == "__main__":
+    stream.ui.launch()
diff --git a/demo/moonshine_live/default-favicon.ico b/demo/moonshine_live/default-favicon.ico
diff --git a/demo/moonshine_live/requirements.txt b/demo/moonshine_live/requirements.txt
@@ -0,0 +1,3 @@
+fastrtc[vad]
+useful-moonshine-onnx@git+https://[email protected]/usefulsensors/moonshine.git#subdirectory=moonshine-onnx
+twilio