From a00f7237542b734e239de539bb3ee99271542ab7 Mon Sep 17 00:00:00 2001 From: tcm390 <60634884+tcm390@users.noreply.github.com> Date: Tue, 21 Jan 2025 09:42:30 +0800 Subject: [PATCH] feat(x spaces): Don't wait for mute, wait for silence (#2576) * use getSetting to get the api key * use debounce for transcription processing instead of unmute * prevent transcription while previous transcription is processing * rename * rename * make sure the flag would be reset * clean all buffers when starting to processing audio * use one timeout instead so only last user audio would be handled * implement interruption * process reply message by context and state * create template file * rename * add should response * use constant * clean code * replace console log * add missing type * add optional for twitterSpaces --- .../src/plugins/SttTtsSpacesPlugin.ts | 519 +++++++++++++----- .../client-twitter/src/plugins/templates.ts | 90 +++ packages/client-twitter/src/spaces.ts | 39 +- packages/core/src/types.ts | 20 +- 4 files changed, 512 insertions(+), 156 deletions(-) create mode 100644 packages/client-twitter/src/plugins/templates.ts diff --git a/packages/client-twitter/src/plugins/SttTtsSpacesPlugin.ts b/packages/client-twitter/src/plugins/SttTtsSpacesPlugin.ts index 767f96fad42..19125faa920 100644 --- a/packages/client-twitter/src/plugins/SttTtsSpacesPlugin.ts +++ b/packages/client-twitter/src/plugins/SttTtsSpacesPlugin.ts @@ -1,19 +1,43 @@ // src/plugins/SttTtsPlugin.ts import { spawn } from "child_process"; -import { type ITranscriptionService, elizaLogger } from "@elizaos/core"; -import type { Space, JanusClient, AudioDataWithUser } from "agent-twitter-client"; -import type { Plugin } from "@elizaos/core"; +import { + type ITranscriptionService, + elizaLogger, + stringToUuid, + composeContext, + getEmbeddingZeroVector, + generateMessageResponse, + ModelClass, + Content, + IAgentRuntime, + Memory, + Plugin, + UUID, + State, + composeRandomUser, + generateShouldRespond, +} from "@elizaos/core"; +import type { + Space, + JanusClient, + AudioDataWithUser, +} from "agent-twitter-client"; +import { ClientBase } from "../base"; +import { + twitterVoiceHandlerTemplate, + twitterShouldRespondTemplate, +} from "./templates"; interface PluginConfig { - openAiApiKey?: string; // for STT & ChatGPT + runtime: IAgentRuntime; + client: ClientBase; + spaceId: string; elevenLabsApiKey?: string; // for TTS sttLanguage?: string; // e.g. "en" for Whisper - gptModel?: string; // e.g. "gpt-3.5-turbo" silenceThreshold?: number; // amplitude threshold for ignoring silence voiceId?: string; // specify which ElevenLabs voice to use elevenLabsModel?: string; // e.g. "eleven_monolingual_v1" - systemPrompt?: string; // ex. "You are a helpful AI assistant" chatContext?: Array<{ role: "system" | "user" | "assistant"; content: string; @@ -21,6 +45,10 @@ interface PluginConfig { transcriptionService: ITranscriptionService; } +const VOLUME_WINDOW_SIZE = 100; +const SPEAKING_THRESHOLD = 0.05; +const SILENCE_DETECTION_THRESHOLD_MS = 1000; // 1-second silence threshold + /** * MVP plugin for speech-to-text (OpenAI) + conversation + TTS (ElevenLabs) * Approach: @@ -30,17 +58,17 @@ interface PluginConfig { export class SttTtsPlugin implements Plugin { name = "SttTtsPlugin"; description = "Speech-to-text (OpenAI) + conversation + TTS (ElevenLabs)"; + private runtime: IAgentRuntime; + private client: ClientBase; + private spaceId: string; private space?: Space; private janus?: JanusClient; - private openAiApiKey?: string; private elevenLabsApiKey?: string; - private gptModel = "gpt-3.5-turbo"; private voiceId = "21m00Tcm4TlvDq8ikWAM"; private elevenLabsModel = "eleven_monolingual_v1"; - private systemPrompt = "You are a helpful AI assistant."; private chatContext: Array<{ role: "system" | "user" | "assistant"; content: string; @@ -53,11 +81,6 @@ export class SttTtsPlugin implements Plugin { */ private pcmBuffers = new Map(); - /** - * Track mute states: userId => boolean (true=unmuted) - */ - private speakerUnmuted = new Map(); - /** * For ignoring near-silence frames (if amplitude < threshold) */ @@ -66,6 +89,11 @@ export class SttTtsPlugin implements Plugin { // TTS queue for sequentially speaking private ttsQueue: string[] = []; private isSpeaking = false; + private isProcessingAudio = false; + + private userSpeakingTimer: NodeJS.Timeout | null = null; + private volumeBuffers: Map; + private ttsAbortController: AbortController | null = null; onAttach(_space: Space) { elizaLogger.log("[SttTtsPlugin] onAttach => space was attached"); @@ -82,10 +110,11 @@ export class SttTtsPlugin implements Plugin { | undefined; const config = params.pluginConfig as PluginConfig; - this.openAiApiKey = config?.openAiApiKey; + this.runtime = config?.runtime; + this.client = config?.client; + this.spaceId = config?.spaceId; this.elevenLabsApiKey = config?.elevenLabsApiKey; this.transcriptionService = config.transcriptionService; - if (config?.gptModel) this.gptModel = config.gptModel; if (typeof config?.silenceThreshold === "number") { this.silenceThreshold = config.silenceThreshold; } @@ -95,45 +124,21 @@ export class SttTtsPlugin implements Plugin { if (config?.elevenLabsModel) { this.elevenLabsModel = config.elevenLabsModel; } - if (config?.systemPrompt) { - this.systemPrompt = config.systemPrompt; - } if (config?.chatContext) { this.chatContext = config.chatContext; } elizaLogger.log("[SttTtsPlugin] Plugin config =>", config); - // Listen for mute events - this.space.on( - "muteStateChanged", - (evt: { userId: string; muted: boolean }) => { - elizaLogger.log( - "[SttTtsPlugin] Speaker muteStateChanged =>", - evt - ); - if (evt.muted) { - this.handleMute(evt.userId).catch((err) => - elizaLogger.error( - "[SttTtsPlugin] handleMute error =>", - err - ) - ); - } else { - this.speakerUnmuted.set(evt.userId, true); - if (!this.pcmBuffers.has(evt.userId)) { - this.pcmBuffers.set(evt.userId, []); - } - } - } - ); + this.volumeBuffers = new Map(); } /** * Called whenever we receive PCM from a speaker */ onAudioData(data: AudioDataWithUser): void { - if (!this.speakerUnmuted.get(data.userId)) return; - + if (this.isProcessingAudio) { + return; + } let maxVal = 0; for (let i = 0; i < data.samples.length; i++) { const val = Math.abs(data.samples[i]); @@ -143,12 +148,62 @@ export class SttTtsPlugin implements Plugin { return; } + if (this.userSpeakingTimer) { + clearTimeout(this.userSpeakingTimer); + } + let arr = this.pcmBuffers.get(data.userId); if (!arr) { arr = []; this.pcmBuffers.set(data.userId, arr); } arr.push(data.samples); + + if (!this.isSpeaking) { + this.userSpeakingTimer = setTimeout(() => { + elizaLogger.log( + "[SttTtsPlugin] start processing audio for user =>", + data.userId + ); + this.userSpeakingTimer = null; + this.processAudio(data.userId).catch((err) => + elizaLogger.error( + "[SttTtsPlugin] handleSilence error =>", + err + ) + ); + }, SILENCE_DETECTION_THRESHOLD_MS); + } else { + // check interruption + let volumeBuffer = this.volumeBuffers.get(data.userId); + if (!volumeBuffer) { + volumeBuffer = []; + this.volumeBuffers.set(data.userId, volumeBuffer); + } + const samples = new Int16Array( + data.samples.buffer, + data.samples.byteOffset, + data.samples.length / 2 + ); + const maxAmplitude = Math.max(...samples.map(Math.abs)) / 32768; + volumeBuffer.push(maxAmplitude); + + if (volumeBuffer.length > VOLUME_WINDOW_SIZE) { + volumeBuffer.shift(); + } + const avgVolume = + volumeBuffer.reduce((sum, v) => sum + v, 0) / + VOLUME_WINDOW_SIZE; + + if (avgVolume > SPEAKING_THRESHOLD) { + volumeBuffer.length = 0; + if (this.ttsAbortController) { + this.ttsAbortController.abort(); + this.isSpeaking = false; + elizaLogger.log("[SttTtsPlugin] TTS playback interrupted"); + } + } + } } // /src/sttTtsPlugin.ts @@ -203,57 +258,84 @@ export class SttTtsPlugin implements Plugin { } /** - * On speaker mute => flush STT => GPT => TTS => push to Janus + * On speaker silence => flush STT => GPT => TTS => push to Janus */ - private async handleMute(userId: string): Promise { - this.speakerUnmuted.set(userId, false); - const chunks = this.pcmBuffers.get(userId) || []; - this.pcmBuffers.set(userId, []); - - if (!chunks.length) { - elizaLogger.warn( - "[SttTtsPlugin] No audio chunks for user =>", - userId - ); + private async processAudio(userId: UUID): Promise { + if (this.isProcessingAudio) { return; } - elizaLogger.log( - `[SttTtsPlugin] Flushing STT buffer for user=${userId}, chunks=${chunks.length}` - ); + this.isProcessingAudio = true; + try { + elizaLogger.log( + "[SttTtsPlugin] Starting audio processing for user:", + userId + ); + const chunks = this.pcmBuffers.get(userId) || []; + this.pcmBuffers.clear(); - const totalLen = chunks.reduce((acc, c) => acc + c.length, 0); - const merged = new Int16Array(totalLen); - let offset = 0; - for (const c of chunks) { - merged.set(c, offset); - offset += c.length; - } + if (!chunks.length) { + elizaLogger.warn( + "[SttTtsPlugin] No audio chunks for user =>", + userId + ); + return; + } + elizaLogger.log( + `[SttTtsPlugin] Flushing STT buffer for user=${userId}, chunks=${chunks.length}` + ); - // Convert PCM to WAV for STT - const wavBuffer = await this.convertPcmToWavInMemory(merged, 48000); + const totalLen = chunks.reduce((acc, c) => acc + c.length, 0); + const merged = new Int16Array(totalLen); + let offset = 0; + for (const c of chunks) { + merged.set(c, offset); + offset += c.length; + } - // Whisper STT - const sttText = await this.transcriptionService.transcribe(wavBuffer); + // Convert PCM to WAV for STT + const wavBuffer = await this.convertPcmToWavInMemory(merged, 48000); - if (!sttText || !sttText.trim()) { - elizaLogger.warn( - "[SttTtsPlugin] No speech recognized for user =>", - userId + // Whisper STT + const sttText = await this.transcriptionService.transcribe( + wavBuffer ); - return; - } - elizaLogger.log( - `[SttTtsPlugin] STT => user=${userId}, text="${sttText}"` - ); - // GPT answer - const replyText = await this.askChatGPT(sttText); - elizaLogger.log( - `[SttTtsPlugin] GPT => user=${userId}, reply="${replyText}"` - ); + elizaLogger.log( + `[SttTtsPlugin] Transcription result: "${sttText}"` + ); - // Use the standard speak method with queue - await this.speakText(replyText); + if (!sttText || !sttText.trim()) { + elizaLogger.warn( + "[SttTtsPlugin] No speech recognized for user =>", + userId + ); + return; + } + elizaLogger.log( + `[SttTtsPlugin] STT => user=${userId}, text="${sttText}"` + ); + + // Get response + const replyText = await this.handleUserMessage(sttText, userId); + if (!replyText || !replyText.length || !replyText.trim()) { + elizaLogger.warn( + "[SttTtsPlugin] No replyText for user =>", + userId + ); + return; + } + elizaLogger.log( + `[SttTtsPlugin] user=${userId}, reply="${replyText}"` + ); + this.isProcessingAudio = false; + this.volumeBuffers.clear(); + // Use the standard speak method with queue + await this.speakText(replyText); + } catch (error) { + elizaLogger.error("[SttTtsPlugin] processAudio error =>", error); + } finally { + this.isProcessingAudio = false; + } } /** @@ -280,55 +362,246 @@ export class SttTtsPlugin implements Plugin { const text = this.ttsQueue.shift(); if (!text) continue; + this.ttsAbortController = new AbortController(); + const { signal } = this.ttsAbortController; + try { const ttsAudio = await this.elevenLabsTts(text); const pcm = await this.convertMp3ToPcm(ttsAudio, 48000); + if (signal.aborted) { + elizaLogger.log( + "[SttTtsPlugin] TTS interrupted before streaming" + ); + return; + } await this.streamToJanus(pcm, 48000); + if (signal.aborted) { + elizaLogger.log( + "[SttTtsPlugin] TTS interrupted after streaming" + ); + return; + } } catch (err) { elizaLogger.error("[SttTtsPlugin] TTS streaming error =>", err); + } finally { + // Clean up the AbortController + this.ttsAbortController = null; } } this.isSpeaking = false; } /** - * Simple ChatGPT call + * Handle User Message */ - private async askChatGPT(userText: string): Promise { - if (!this.openAiApiKey) { - throw new Error("[SttTtsPlugin] No OpenAI API key for ChatGPT"); + private async handleUserMessage( + userText: string, + userId: UUID + ): Promise { + await this.runtime.ensureUserExists( + this.runtime.agentId, + this.client.profile.username, + this.runtime.character.name, + "twitter" + ); + + const roomId = stringToUuid("twitter_generate_room-" + this.spaceId); + let state = await this.runtime.composeState( + { + agentId: this.runtime.agentId, + content: { text: userText, source: "twitter" }, + userId, + roomId, + }, + { + twitterUserName: this.client.profile.username, + agentName: this.runtime.character.name, + } + ); + + const memory = { + id: stringToUuid(roomId + "-voice-message-" + Date.now()), + agentId: this.runtime.agentId, + content: { + text: userText, + source: "twitter", + }, + userId, + roomId, + embedding: getEmbeddingZeroVector(), + createdAt: Date.now(), + }; + + await this.runtime.messageManager.createMemory(memory); + + state = await this.runtime.updateRecentMessageState(state); + + const shouldIgnore = await this._shouldIgnore(memory); + + if (shouldIgnore) { + return ""; } - const url = "https://api.openai.com/v1/chat/completions"; - const messages = [ - { role: "system", content: this.systemPrompt }, - ...this.chatContext, - { role: "user", content: userText }, - ]; - const resp = await fetch(url, { - method: "POST", - headers: { - Authorization: `Bearer ${this.openAiApiKey}`, - "Content-Type": "application/json", + const shouldRespond = await this._shouldRespond(userText, state); + + if (!shouldRespond) { + return ""; + } + + const context = composeContext({ + state, + template: + this.runtime.character.templates?.twitterVoiceHandlerTemplate || + this.runtime.character.templates?.messageHandlerTemplate || + twitterVoiceHandlerTemplate, + }); + + const responseContent = await this._generateResponse(memory, context); + + const responseMemory: Memory = { + id: stringToUuid(memory.id + "-voice-response-" + Date.now()), + agentId: this.runtime.agentId, + userId: this.runtime.agentId, + content: { + ...responseContent, + user: this.runtime.character.name, + inReplyTo: memory.id, }, - body: JSON.stringify({ - model: this.gptModel, - messages, - }), + roomId, + embedding: getEmbeddingZeroVector(), + }; + + const reply = responseMemory.content.text?.trim(); + if (reply) { + await this.runtime.messageManager.createMemory(responseMemory); + } + + return reply; + } + + private async _generateResponse( + message: Memory, + context: string + ): Promise { + const { userId, roomId } = message; + + const response = await generateMessageResponse({ + runtime: this.runtime, + context, + modelClass: ModelClass.SMALL, }); - if (!resp.ok) { - const errText = await resp.text(); - throw new Error( - `[SttTtsPlugin] ChatGPT error => ${resp.status} ${errText}` + response.source = "discord"; + + if (!response) { + elizaLogger.error( + "[SttTtsPlugin] No response from generateMessageResponse" ); + return; + } + + await this.runtime.databaseAdapter.log({ + body: { message, context, response }, + userId: userId, + roomId, + type: "response", + }); + + return response; + } + + private async _shouldIgnore(message: Memory): Promise { + elizaLogger.debug("message.content: ", message.content); + // if the message is 3 characters or less, ignore it + if ((message.content as Content).text.length < 3) { + return true; + } + + const loseInterestWords = [ + // telling the bot to stop talking + "shut up", + "stop", + "dont talk", + "silence", + "stop talking", + "be quiet", + "hush", + "stfu", + "stupid bot", + "dumb bot", + + // offensive words + "fuck", + "shit", + "damn", + "suck", + "dick", + "cock", + "sex", + "sexy", + ]; + if ( + (message.content as Content).text.length < 50 && + loseInterestWords.some((word) => + (message.content as Content).text?.toLowerCase().includes(word) + ) + ) { + return true; + } + + const ignoreWords = ["k", "ok", "bye", "lol", "nm", "uh"]; + if ( + (message.content as Content).text?.length < 8 && + ignoreWords.some((word) => + (message.content as Content).text?.toLowerCase().includes(word) + ) + ) { + return true; + } + + return false; + } + + private async _shouldRespond( + message: string, + state: State + ): Promise { + const lowerMessage = message.toLowerCase(); + const characterName = this.runtime.character.name.toLowerCase(); + + if (lowerMessage.includes(characterName)) { + return true; } - const json = await resp.json(); - const reply = json.choices?.[0]?.message?.content || ""; - this.chatContext.push({ role: "user", content: userText }); - this.chatContext.push({ role: "assistant", content: reply }); - return reply.trim(); + // If none of the above conditions are met, use the generateText to decide + const shouldRespondContext = composeContext({ + state, + template: + this.runtime.character.templates + ?.twitterShouldRespondTemplate || + this.runtime.character.templates?.shouldRespondTemplate || + composeRandomUser(twitterShouldRespondTemplate, 2), + }); + + const response = await generateShouldRespond({ + runtime: this.runtime, + context: shouldRespondContext, + modelClass: ModelClass.SMALL, + }); + + if (response === "RESPOND") { + return true; + } else if (response === "IGNORE") { + return false; + } else if (response === "STOP") { + return false; + } else { + elizaLogger.error( + "Invalid response from response generateText:", + response + ); + return false; + } } /** @@ -422,6 +695,10 @@ export class SttTtsPlugin implements Plugin { offset + FRAME_SIZE <= samples.length; offset += FRAME_SIZE ) { + if (this.ttsAbortController?.signal.aborted) { + elizaLogger.log("[SttTtsPlugin] streamToJanus interrupted"); + return; + } const frame = new Int16Array(FRAME_SIZE); frame.set(samples.subarray(offset, offset + FRAME_SIZE)); this.janus?.pushLocalAudio(frame, sampleRate, 1); @@ -431,19 +708,6 @@ export class SttTtsPlugin implements Plugin { } } - public setSystemPrompt(prompt: string) { - this.systemPrompt = prompt; - elizaLogger.log("[SttTtsPlugin] setSystemPrompt =>", prompt); - } - - /** - * Change the GPT model at runtime (e.g. "gpt-4", "gpt-3.5-turbo", etc.). - */ - public setGptModel(model: string) { - this.gptModel = model; - elizaLogger.log("[SttTtsPlugin] setGptModel =>", model); - } - /** * Add a message (system, user or assistant) to the chat context. * E.g. to store conversation history or inject a persona. @@ -466,8 +730,9 @@ export class SttTtsPlugin implements Plugin { cleanup(): void { elizaLogger.log("[SttTtsPlugin] cleanup => releasing resources"); this.pcmBuffers.clear(); - this.speakerUnmuted.clear(); + this.userSpeakingTimer = null; this.ttsQueue = []; this.isSpeaking = false; + this.volumeBuffers.clear(); } } diff --git a/packages/client-twitter/src/plugins/templates.ts b/packages/client-twitter/src/plugins/templates.ts new file mode 100644 index 00000000000..05882425c06 --- /dev/null +++ b/packages/client-twitter/src/plugins/templates.ts @@ -0,0 +1,90 @@ +import { messageCompletionFooter, shouldRespondFooter } from "@elizaos/core"; + +export const twitterShouldRespondTemplate = + `# Task: Decide if {{agentName}} should respond. +About {{agentName}}: +{{bio}} + +# INSTRUCTIONS: Determine if {{agentName}} should respond to the message and participate in the conversation. Do not comment. Just respond with "RESPOND" or "IGNORE" or "STOP". + +# RESPONSE EXAMPLES +{{user1}}: I just saw a really great movie +{{user2}}: Oh? Which movie? +Result: [IGNORE] + +{{agentName}}: Oh, this is my favorite scene +{{user1}}: sick +{{user2}}: wait, why is it your favorite scene +Result: [RESPOND] + +{{user1}}: stfu bot +Result: [STOP] + +{{user1}}: Hey {{agent}}, can you help me with something +Result: [RESPOND] + +{{user1}}: {{agentName}} stfu plz +Result: [STOP] + +{{user1}}: i need help +{{agentName}}: how can I help you? +{{user1}}: no. i need help from someone else +Result: [IGNORE] + +{{user1}}: Hey {{agent}}, can I ask you a question +{{agentName}}: Sure, what is it +{{user1}}: can you ask claude to create a basic react module that demonstrates a counter +Result: [RESPOND] + +{{user1}}: {{agentName}} can you tell me a story +{{user1}}: about a girl named elara +{{agentName}}: Sure. +{{agentName}}: Once upon a time, in a quaint little village, there was a curious girl named Elara. +{{agentName}}: Elara was known for her adventurous spirit and her knack for finding beauty in the mundane. +{{user1}}: I'm loving it, keep going +Result: [RESPOND] + +{{user1}}: {{agentName}} stop responding plz +Result: [STOP] + +{{user1}}: okay, i want to test something. can you say marco? +{{agentName}}: marco +{{user1}}: great. okay, now do it again +Result: [RESPOND] + +Response options are [RESPOND], [IGNORE] and [STOP]. + +{{agentName}} is in a room with other users and is very worried about being annoying and saying too much. +Respond with [RESPOND] to messages that are directed at {{agentName}}, or participate in conversations that are interesting or relevant to their background. +If a message is not interesting or relevant, respond with [IGNORE] +Unless directly responding to a user, respond with [IGNORE] to messages that are very short or do not contain much information. +If a user asks {{agentName}} to be quiet, respond with [STOP] +If {{agentName}} concludes a conversation and isn't part of the conversation anymore, respond with [STOP] + +IMPORTANT: {{agentName}} is particularly sensitive about being annoying, so if there is any doubt, it is better to respond with [IGNORE]. +If {{agentName}} is conversing with a user and they have not asked to stop, it is better to respond with [RESPOND]. + +{{recentMessages}} + +# INSTRUCTIONS: Choose the option that best describes {{agentName}}'s response to the last message. Ignore messages if they are addressed to someone else. +` + shouldRespondFooter; + +export const twitterVoiceHandlerTemplate = + `# Task: Generate conversational voice dialog for {{agentName}}. + About {{agentName}}: + {{bio}} + + # Attachments + {{attachments}} + + # Capabilities + Note that {{agentName}} is capable of reading/seeing/hearing various forms of media, including images, videos, audio, plaintext and PDFs. Recent attachments have been included above under the "Attachments" section. + + {{actions}} + + {{messageDirections}} + + {{recentMessages}} + + # Instructions: Write the next message for {{agentName}}. Include an optional action if appropriate. {{actionNames}} + ` + messageCompletionFooter; diff --git a/packages/client-twitter/src/spaces.ts b/packages/client-twitter/src/spaces.ts index 44f679c3298..d9c22e9dd77 100644 --- a/packages/client-twitter/src/spaces.ts +++ b/packages/client-twitter/src/spaces.ts @@ -6,6 +6,7 @@ import { ModelClass, ServiceType, type ITranscriptionService, + TwitterSpaceDecisionOptions, } from "@elizaos/core"; import type { ClientBase } from "./base"; import { @@ -18,24 +19,6 @@ import { } from "agent-twitter-client"; import { SttTtsPlugin } from "./plugins/SttTtsSpacesPlugin.ts"; -interface SpaceDecisionOptions { - maxSpeakers?: number; - topics?: string[]; - typicalDurationMinutes?: number; - idleKickTimeoutMs?: number; - minIntervalBetweenSpacesMinutes?: number; - businessHoursOnly?: boolean; - randomChance?: number; - enableIdleMonitor?: boolean; - enableSttTts?: boolean; - enableRecording?: boolean; - voiceId?: string; - sttLanguage?: string; - gptModel?: string; - systemPrompt?: string; - speakerMaxDurationMs?: number; -} - interface CurrentSpeakerState { userId: string; sessionUUID: string; @@ -134,6 +117,7 @@ Example: * Main class: manage a Twitter Space with N speakers max, speaker queue, filler messages, etc. */ export class TwitterSpaceClient { + private runtime: IAgentRuntime; private client: ClientBase; private scraper: Scraper; private isSpaceRunning = false; @@ -150,11 +134,12 @@ export class TwitterSpaceClient { private activeSpeakers: CurrentSpeakerState[] = []; private speakerQueue: SpeakerRequest[] = []; - private decisionOptions: SpaceDecisionOptions; + private decisionOptions: TwitterSpaceDecisionOptions; constructor(client: ClientBase, runtime: IAgentRuntime) { this.client = client; this.scraper = client.twitterClient; + this.runtime = runtime; const charSpaces = runtime.character.twitterSpaces || {}; this.decisionOptions = { @@ -174,8 +159,6 @@ export class TwitterSpaceClient { runtime.character.settings.voice.model || "Xb7hH8MSUJpSbSDYk0k2", sttLanguage: charSpaces.sttLanguage || "en", - gptModel: charSpaces.gptModel, - systemPrompt: charSpaces.systemPrompt, speakerMaxDurationMs: charSpaces.speakerMaxDurationMs ?? 4 * 60_000, }; } @@ -307,9 +290,11 @@ export class TwitterSpaceClient { this.speakerQueue = []; // Retrieve keys - const openAiKey = process.env.OPENAI_API_KEY || ""; - const elevenLabsKey = process.env.ELEVENLABS_XI_API_KEY || ""; + const elevenLabsKey = + this.runtime.getSetting("ELEVENLABS_XI_API_KEY") || ""; + const broadcastInfo = await this.currentSpace.initialize(config); + this.spaceId = broadcastInfo.room_id; // Plugins if (this.decisionOptions.enableRecording) { elizaLogger.log("[Space] Using RecordToDiskPlugin"); @@ -321,11 +306,11 @@ export class TwitterSpaceClient { const sttTts = new SttTtsPlugin(); this.sttTtsPlugin = sttTts; this.currentSpace.use(sttTts, { - openAiApiKey: openAiKey, + runtime: this.runtime, + client: this.client, + spaceId: this.spaceId, elevenLabsApiKey: elevenLabsKey, voiceId: this.decisionOptions.voiceId, - gptModel: this.decisionOptions.gptModel, - systemPrompt: this.decisionOptions.systemPrompt, sttLanguage: this.decisionOptions.sttLanguage, transcriptionService: this.client.runtime.getService( @@ -344,8 +329,6 @@ export class TwitterSpaceClient { ); } - const broadcastInfo = await this.currentSpace.initialize(config); - this.spaceId = broadcastInfo.room_id; this.isSpaceRunning = true; await this.scraper.sendTweet( broadcastInfo.share_url.replace("broadcasts", "spaces") diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index a53f44591af..3b4018ad42b 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -740,6 +740,7 @@ export type Character = { twitterPostTemplate?: TemplateType; twitterMessageHandlerTemplate?: TemplateType; twitterShouldRespondTemplate?: TemplateType; + twitterVoiceHandlerTemplate?: TemplateType; instagramPostTemplate?: TemplateType; instagramMessageHandlerTemplate?: TemplateType; instagramShouldRespondTemplate?: TemplateType; @@ -918,8 +919,26 @@ export type Character = { /**Optinal Parent characters to inherit information from */ extends?: string[]; + + twitterSpaces?: TwitterSpaceDecisionOptions; }; +export interface TwitterSpaceDecisionOptions { + maxSpeakers?: number; + topics?: string[]; + typicalDurationMinutes?: number; + idleKickTimeoutMs?: number; + minIntervalBetweenSpacesMinutes?: number; + businessHoursOnly?: boolean; + randomChance?: number; + enableIdleMonitor?: boolean; + enableSttTts?: boolean; + enableRecording?: boolean; + voiceId?: string; + sttLanguage?: string; + speakerMaxDurationMs?: number; +} + /** * Interface for database operations */ @@ -1622,4 +1641,3 @@ export interface ChunkRow { id: string; // Add other properties if needed } -