Skip to content

Commit da75565

Browse files
committed
Squashed commit of the following:
commit a685e11 Author: Varshith B <[email protected]> Date: Mon Feb 17 22:46:25 2025 +0530 feat: add cleanup commit 0e4d8af Author: Varshith B <[email protected]> Date: Sun Feb 16 22:52:46 2025 +0530 fix: audio frame skipping commit 44df170 Author: Varshith B <[email protected]> Date: Sun Feb 16 22:18:23 2025 +0530 fix: frame skipping commit c3009c6 Author: Varshith B <[email protected]> Date: Thu Feb 13 18:49:26 2025 +0530 fix: dynamic prompt change commit 383105b Author: Elite Encoder <[email protected]> Date: Wed Feb 12 21:24:34 2025 +0000 fix prompt control panel rendering (wip) commit b372b10 Author: Elite Encoder <[email protected]> Date: Wed Feb 12 18:11:50 2025 +0000 add libsndfile1 commit 214cfec Merge: a0ed6f8 6f54863 Author: Varshith Bathini <[email protected]> Date: Tue Feb 11 22:43:18 2025 +0530 Merge branch 'main' into feat/audio_support commit a0ed6f8 Author: Varshith Bathini <[email protected]> Date: Fri Feb 7 01:54:42 2025 +0530 Apply suggestions from code review Co-authored-by: John | Elite Encoder <[email protected]> commit b85d01d Author: Varshith B <[email protected]> Date: Tue Feb 4 17:29:17 2025 +0530 fix: update prompts commit aa209f0 Merge: 6e134b7 af132be Author: Varshith B <[email protected]> Date: Mon Feb 3 23:01:48 2025 +0530 Merge branch 'feat/audio_support' of https://github.com/varshith15/comfystream into feat/audio_support commit 6e134b7 Author: Varshith B <[email protected]> Date: Mon Feb 3 23:00:16 2025 +0530 fix: pitch shift example commit 1c1959e Author: Varshith B <[email protected]> Date: Mon Feb 3 21:44:19 2025 +0530 fix: one-to-one mapping commit af132be Merge: 5e9e755 5051688 Author: Varshith Bathini <[email protected]> Date: Thu Jan 30 22:23:21 2025 +0530 Merge branch 'main' into feat/audio_support commit 5e9e755 Author: Varshith B <[email protected]> Date: Thu Jan 30 22:11:20 2025 +0530 fix: muted functionality commit 06faf49 Author: Varshith B <[email protected]> Date: Thu Jan 30 21:39:59 2025 +0530 fix: audio and video muting commit 5897596 Author: Varshith B <[email protected]> Date: Thu Jan 30 20:47:44 2025 +0530 fix: warmup issue fix commit 126fb3a Author: Varshith B <[email protected]> Date: Thu Jan 30 17:46:41 2025 +0530 fix: cleanup commit 2d04794 Author: Varshith B <[email protected]> Date: Thu Jan 30 17:14:19 2025 +0530 fix: audio nodes commit 6c529f5 Merge: b4f871a 9583a28 Author: Varshith B <[email protected]> Date: Wed Jan 29 15:13:56 2025 +0530 fix: merge conflicts commit b4f871a Author: Varshith B <[email protected]> Date: Mon Jan 27 23:54:10 2025 +0530 fix: load multiple workflows commit eb161c2 Author: Varshith B <[email protected]> Date: Mon Jan 27 22:54:32 2025 +0530 fix: combine workflows commit 415c387 Author: Varshith B <[email protected]> Date: Fri Jan 24 23:09:21 2025 +0530 feat: combine audio and video streams commit 49deb2f Author: Varshith B <[email protected]> Date: Mon Jan 20 13:12:52 2025 +0530 fix: server commit 2a3d086 Author: Varshith B <[email protected]> Date: Sun Dec 29 23:35:54 2024 +0530 temp: working state commit 21e4310 Author: Varshith B <[email protected]> Date: Sun Dec 29 16:45:45 2024 +0000 feat: streaming whisper commit 29f6bb7 Author: Varshith B <[email protected]> Date: Sat Dec 28 01:18:51 2024 +0530 feat: audio pipeline commit 960aebe Author: Varshith B <[email protected]> Date: Tue Dec 24 23:16:16 2024 +0530 fix: only hear remote audio commit 7035b03 Author: Varshith B <[email protected]> Date: Tue Dec 24 01:10:59 2024 +0530 fix: combine tracks commit 6dad03a Author: Varshith B <[email protected]> Date: Wed Dec 18 00:50:58 2024 +0530 fix: remove muted commit a484226 Author: Varshith B <[email protected]> Date: Sat Dec 14 20:25:52 2024 +0000 feat: ui changes to get audio commit f182502 Merge: 8613ea5 8a6b528 Author: Varshith B <[email protected]> Date: Sat Dec 14 19:08:53 2024 +0000 fix: merge conflicts commit 8613ea5 Merge: 743da71 fe8b261 Author: Varshith B <[email protected]> Date: Sat Dec 14 18:50:22 2024 +0000 fix: merge conflicts commit 743da71 Author: Varshith B <[email protected]> Date: Sat Dec 14 18:47:25 2024 +0000 fix: app commit fbe59f4 Author: Varshith B <[email protected]> Date: Sat Dec 14 18:45:19 2024 +0000 feat: whisper workflow commit fe8b261 Author: Varshith B <[email protected]> Date: Sat Dec 7 22:31:07 2024 +0530 feat: init
1 parent 497dcb3 commit da75565

21 files changed

+653
-234
lines changed

docker/Dockerfile.base

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -
99
wget \
1010
nano \
1111
socat \
12+
libsndfile1 \
1213
build-essential llvm tk-dev \
1314
&& rm -rf /var/lib/apt/lists/*
1415

nodes/audio_utils/__init__.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .load_audio_tensor import LoadAudioTensor
2+
from .save_audio_tensor import SaveAudioTensor
3+
from .pitch_shift import PitchShifter
4+
5+
NODE_CLASS_MAPPINGS = {"LoadAudioTensor": LoadAudioTensor, "SaveAudioTensor": SaveAudioTensor, "PitchShifter": PitchShifter}
6+
7+
__all__ = ["NODE_CLASS_MAPPINGS"]
+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import numpy as np
2+
3+
from comfystream import tensor_cache
4+
5+
class LoadAudioTensor:
6+
CATEGORY = "audio_utils"
7+
RETURN_TYPES = ("WAVEFORM", "INT")
8+
FUNCTION = "execute"
9+
10+
def __init__(self):
11+
self.audio_buffer = np.empty(0, dtype=np.int16)
12+
self.buffer_samples = None
13+
self.sample_rate = None
14+
15+
@classmethod
16+
def INPUT_TYPES(s):
17+
return {
18+
"required": {
19+
"buffer_size": ("FLOAT", {"default": 500.0}),
20+
}
21+
}
22+
23+
@classmethod
24+
def IS_CHANGED():
25+
return float("nan")
26+
27+
def execute(self, buffer_size):
28+
if self.sample_rate is None or self.buffer_samples is None:
29+
frame = tensor_cache.audio_inputs.get(block=True)
30+
self.sample_rate = frame.sample_rate
31+
self.buffer_samples = int(self.sample_rate * buffer_size / 1000)
32+
self.leftover = frame.side_data.input
33+
34+
if self.leftover.shape[0] < self.buffer_samples:
35+
chunks = [self.leftover] if self.leftover.size > 0 else []
36+
total_samples = self.leftover.shape[0]
37+
38+
while total_samples < self.buffer_samples:
39+
frame = tensor_cache.audio_inputs.get(block=True)
40+
if frame.sample_rate != self.sample_rate:
41+
raise ValueError("Sample rate mismatch")
42+
chunks.append(frame.side_data.input)
43+
total_samples += frame.side_data.input.shape[0]
44+
45+
merged_audio = np.concatenate(chunks, dtype=np.int16)
46+
buffered_audio = merged_audio[:self.buffer_samples]
47+
self.leftover = merged_audio[self.buffer_samples:]
48+
else:
49+
buffered_audio = self.leftover[:self.buffer_samples]
50+
self.leftover = self.leftover[self.buffer_samples:]
51+
52+
return buffered_audio, self.sample_rate

nodes/audio_utils/pitch_shift.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
import librosa
3+
4+
class PitchShifter:
5+
CATEGORY = "audio_utils"
6+
RETURN_TYPES = ("WAVEFORM", "INT")
7+
FUNCTION = "execute"
8+
9+
@classmethod
10+
def INPUT_TYPES(cls):
11+
return {
12+
"required": {
13+
"audio": ("WAVEFORM",),
14+
"sample_rate": ("INT",),
15+
"pitch_shift": ("FLOAT", {
16+
"default": 4.0,
17+
"min": 0.0,
18+
"max": 12.0,
19+
"step": 0.5
20+
}),
21+
}
22+
}
23+
24+
@classmethod
25+
def IS_CHANGED(cls):
26+
return float("nan")
27+
28+
def execute(self, audio, sample_rate, pitch_shift):
29+
audio_float = audio.astype(np.float32) / 32768.0
30+
shifted_audio = librosa.effects.pitch_shift(y=audio_float, sr=sample_rate, n_steps=pitch_shift)
31+
shifted_int16 = np.clip(shifted_audio * 32768.0, -32768, 32767).astype(np.int16)
32+
return shifted_int16, sample_rate
+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from comfystream import tensor_cache
2+
3+
class SaveAudioTensor:
4+
CATEGORY = "audio_utils"
5+
RETURN_TYPES = ()
6+
FUNCTION = "execute"
7+
OUTPUT_NODE = True
8+
9+
10+
@classmethod
11+
def INPUT_TYPES(s):
12+
return {
13+
"required": {
14+
"audio": ("WAVEFORM",)
15+
}
16+
}
17+
18+
@classmethod
19+
def IS_CHANGED(s):
20+
return float("nan")
21+
22+
def execute(self, audio):
23+
tensor_cache.audio_outputs.put_nowait(audio)
24+
return (audio,)
25+

nodes/tensor_utils/load_tensor.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@ def IS_CHANGED():
1515
return float("nan")
1616

1717
def execute(self):
18-
input = tensor_cache.inputs.pop()
19-
return (input,)
18+
frame = tensor_cache.image_inputs.get(block=True)
19+
frame.side_data.skipped = False
20+
return (frame.side_data.input,)

nodes/tensor_utils/save_tensor.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,5 @@ def IS_CHANGED(s):
2222
return float("nan")
2323

2424
def execute(self, images: torch.Tensor):
25-
fut = tensor_cache.outputs.pop()
26-
fut.set_result(images)
25+
tensor_cache.image_outputs.put_nowait(images)
2726
return images

server/app.py

+49-18
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,36 @@
3030

3131
class VideoStreamTrack(MediaStreamTrack):
3232
kind = "video"
33+
def __init__(self, track: MediaStreamTrack, pipeline):
34+
super().__init__()
35+
self.track = track
36+
self.pipeline = pipeline
37+
asyncio.create_task(self.collect_frames())
38+
39+
async def collect_frames(self):
40+
while True:
41+
frame = await self.track.recv()
42+
await self.pipeline.put_video_frame(frame)
3343

44+
async def recv(self):
45+
return await self.pipeline.get_processed_video_frame()
46+
47+
48+
class AudioStreamTrack(MediaStreamTrack):
49+
kind = "audio"
3450
def __init__(self, track: MediaStreamTrack, pipeline):
3551
super().__init__()
3652
self.track = track
3753
self.pipeline = pipeline
54+
asyncio.create_task(self.collect_frames())
55+
56+
async def collect_frames(self):
57+
while True:
58+
frame = await self.track.recv()
59+
await self.pipeline.put_audio_frame(frame)
3860

3961
async def recv(self):
40-
frame = await self.track.recv()
41-
return await self.pipeline(frame)
62+
return await self.pipeline.get_processed_audio_frame()
4263

4364

4465
def force_codec(pc, sender, forced_codec):
@@ -87,8 +108,7 @@ async def offer(request):
87108

88109
params = await request.json()
89110

90-
pipeline.set_prompt(params["prompt"])
91-
await pipeline.warm()
111+
await pipeline.set_prompts(params["prompts"])
92112

93113
offer_params = params["offer"]
94114
offer = RTCSessionDescription(sdp=offer_params["sdp"], type=offer_params["type"])
@@ -103,17 +123,19 @@ async def offer(request):
103123

104124
pcs.add(pc)
105125

106-
tracks = {"video": None}
126+
tracks = {"video": None, "audio": None}
107127

108-
# Prefer h264
109-
transceiver = pc.addTransceiver("video")
110-
caps = RTCRtpSender.getCapabilities("video")
111-
prefs = list(filter(lambda x: x.name == "H264", caps.codecs))
112-
transceiver.setCodecPreferences(prefs)
128+
# Only add video transceiver if video is present in the offer
129+
if "m=video" in offer.sdp:
130+
# Prefer h264
131+
transceiver = pc.addTransceiver("video")
132+
caps = RTCRtpSender.getCapabilities("video")
133+
prefs = list(filter(lambda x: x.name == "H264", caps.codecs))
134+
transceiver.setCodecPreferences(prefs)
113135

114-
# Monkey patch max and min bitrate to ensure constant bitrate
115-
h264.MAX_BITRATE = MAX_BITRATE
116-
h264.MIN_BITRATE = MIN_BITRATE
136+
# Monkey patch max and min bitrate to ensure constant bitrate
137+
h264.MAX_BITRATE = MAX_BITRATE
138+
h264.MIN_BITRATE = MIN_BITRATE
117139

118140
# Handle control channel from client
119141
@pc.on("datachannel")
@@ -131,13 +153,13 @@ async def on_message(message):
131153
"nodes": nodes_info
132154
}
133155
channel.send(json.dumps(response))
134-
elif params.get("type") == "update_prompt":
135-
if "prompt" not in params:
156+
elif params.get("type") == "update_prompts":
157+
if "prompts" not in params:
136158
logger.warning("[Control] Missing prompt in update_prompt message")
137159
return
138-
pipeline.set_prompt(params["prompt"])
160+
await pipeline.update_prompts(params["prompts"])
139161
response = {
140-
"type": "prompt_updated",
162+
"type": "prompts_updated",
141163
"success": True
142164
}
143165
channel.send(json.dumps(response))
@@ -158,6 +180,10 @@ def on_track(track):
158180

159181
codec = "video/H264"
160182
force_codec(pc, sender, codec)
183+
elif track.kind == "audio":
184+
audioTrack = AudioStreamTrack(track, pipeline)
185+
tracks["audio"] = audioTrack
186+
pc.addTrack(audioTrack)
161187

162188
@track.on("ended")
163189
async def on_ended():
@@ -175,6 +201,11 @@ async def on_connectionstatechange():
175201

176202
await pc.setRemoteDescription(offer)
177203

204+
if "m=audio" in pc.remoteDescription.sdp:
205+
await pipeline.warm_audio()
206+
if "m=video" in pc.remoteDescription.sdp:
207+
await pipeline.warm_video()
208+
178209
answer = await pc.createAnswer()
179210
await pc.setLocalDescription(answer)
180211

@@ -190,7 +221,7 @@ async def set_prompt(request):
190221
pipeline = request.app["pipeline"]
191222

192223
prompt = await request.json()
193-
pipeline.set_prompt(prompt)
224+
await pipeline.set_prompts(prompt)
194225

195226
return web.Response(content_type="application/json", text="OK")
196227

0 commit comments

Comments
 (0)