Skip to content

Commit

Permalink
feat: tts
Browse files Browse the repository at this point in the history
  • Loading branch information
DDMeaqua committed Sep 18, 2024
1 parent 212605a commit 3ae8ec1
Show file tree
Hide file tree
Showing 19 changed files with 2 additions and 490 deletions.
11 changes: 0 additions & 11 deletions app/client/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,6 @@ export interface SpeechOptions {
onController?: (controller: AbortController) => void;
}

export interface TranscriptionOptions {
model?: "whisper-1";
file: Blob;
language?: string;
prompt?: string;
response_format?: "json" | "text" | "srt" | "verbose_json" | "vtt";
temperature?: number;
onController?: (controller: AbortController) => void;
}

export interface ChatOptions {
messages: RequestMessage[];
config: LLMConfig;
Expand Down Expand Up @@ -109,7 +99,6 @@ export interface LLMModelProvider {
export abstract class LLMApi {
abstract chat(options: ChatOptions): Promise<void>;
abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
abstract transcription(options: TranscriptionOptions): Promise<string>;
abstract usage(): Promise<LLMUsage>;
abstract models(): Promise<LLMModel[]>;
}
Expand Down
4 changes: 0 additions & 4 deletions app/client/platforms/alibaba.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import {
LLMApi,
LLMModel,
SpeechOptions,
TranscriptionOptions,
MultimodalContent,
} from "../api";
import Locale from "../../locales";
Expand Down Expand Up @@ -88,9 +87,6 @@ export class QwenApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
Expand Down
4 changes: 0 additions & 4 deletions app/client/platforms/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
LLMApi,
MultimodalContent,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import {
useAccessStore,
Expand Down Expand Up @@ -90,9 +89,6 @@ export class ClaudeApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}

extractMessage(res: any) {
console.log("[Response] claude response: ", res);
Expand Down
4 changes: 0 additions & 4 deletions app/client/platforms/baidu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import {
LLMModel,
MultimodalContent,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -80,9 +79,6 @@ export class ErnieApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
Expand Down
4 changes: 0 additions & 4 deletions app/client/platforms/bytedance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import {
LLMModel,
MultimodalContent,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -82,9 +81,6 @@ export class DoubaoApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
Expand Down
5 changes: 1 addition & 4 deletions app/client/platforms/google.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import {
LLMModel,
LLMUsage,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getClientConfig } from "@/app/config/client";
Expand Down Expand Up @@ -67,9 +66,7 @@ export class GeminiProApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions): Promise<void> {
const apiClient = this;
let multimodal = false;
Expand Down
4 changes: 0 additions & 4 deletions app/client/platforms/iflytek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import {
LLMApi,
LLMModel,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -63,9 +62,6 @@ export class SparkApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = [];
Expand Down
4 changes: 0 additions & 4 deletions app/client/platforms/moonshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import {
LLMUsage,
MultimodalContent,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -77,9 +76,6 @@ export class MoonshotApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = [];
Expand Down
42 changes: 0 additions & 42 deletions app/client/platforms/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import {
LLMUsage,
MultimodalContent,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -187,47 +186,6 @@ export class ChatGPTApi implements LLMApi {
}
}

async transcription(options: TranscriptionOptions): Promise<string> {
const formData = new FormData();
formData.append("file", options.file, "audio.wav");
formData.append("model", options.model ?? "whisper-1");
if (options.language) formData.append("language", options.language);
if (options.prompt) formData.append("prompt", options.prompt);
if (options.response_format)
formData.append("response_format", options.response_format);
if (options.temperature)
formData.append("temperature", options.temperature.toString());

console.log("[Request] openai audio transcriptions payload: ", options);

const controller = new AbortController();
options.onController?.(controller);

try {
const path = this.path(OpenaiPath.TranscriptionPath, options.model);
const headers = getHeaders(true);
const payload = {
method: "POST",
body: formData,
signal: controller.signal,
headers: headers,
};

// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
REQUEST_TIMEOUT_MS,
);
const res = await fetch(path, payload);
clearTimeout(requestTimeoutId);
const json = await res.json();
return json.text;
} catch (e) {
console.log("[Request] failed to make a audio transcriptions request", e);
throw e;
}
}

async chat(options: ChatOptions) {
const modelConfig = {
...useAppConfig.getState().modelConfig,
Expand Down
4 changes: 0 additions & 4 deletions app/client/platforms/tencent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import {
LLMModel,
MultimodalContent,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -94,9 +93,6 @@ export class HunyuanApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const visionModel = isVisionModel(options.config.model);
Expand Down
58 changes: 1 addition & 57 deletions app/components/chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import React, {
} from "react";

import SendWhiteIcon from "../icons/send-white.svg";
import VoiceWhiteIcon from "../icons/voice-white.svg";
import BrainIcon from "../icons/brain.svg";
import RenameIcon from "../icons/rename.svg";
import ExportIcon from "../icons/share.svg";
Expand Down Expand Up @@ -83,7 +82,7 @@ import dynamic from "next/dynamic";
import { ChatControllerPool } from "../client/controller";
import { DalleSize, DalleQuality, DalleStyle } from "../typing";
import { Prompt, usePromptStore } from "../store/prompt";
import Locale, { getLang, getSTTLang } from "../locales";
import Locale from "../locales";

import { IconButton } from "./button";
import styles from "./chat.module.scss";
Expand All @@ -100,9 +99,7 @@ import {
import { useNavigate } from "react-router-dom";
import {
CHAT_PAGE_SIZE,
DEFAULT_STT_ENGINE,
DEFAULT_TTS_ENGINE,
FIREFOX_DEFAULT_STT_ENGINE,
ModelProvider,
LAST_INPUT_KEY,
Path,
Expand All @@ -123,11 +120,6 @@ import { MultimodalContent } from "../client/api";
const localStorage = safeLocalStorage();
import { ClientApi } from "../client/api";
import { createTTSPlayer } from "../utils/audio";
import {
OpenAITranscriptionApi,
SpeechApi,
WebTranscriptionApi,
} from "../utils/speech";
import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";

const ttsPlayer = createTTSPlayer();
Expand Down Expand Up @@ -556,44 +548,6 @@ export function ChatActions(props: {
}
}, [chatStore, currentModel, models]);

const [isListening, setIsListening] = useState(false);
const [isTranscription, setIsTranscription] = useState(false);
const [speechApi, setSpeechApi] = useState<any>(null);

useEffect(() => {
if (isFirefox()) config.sttConfig.engine = FIREFOX_DEFAULT_STT_ENGINE;
setSpeechApi(
config.sttConfig.engine === DEFAULT_STT_ENGINE
? new WebTranscriptionApi((transcription) =>
onRecognitionEnd(transcription),
)
: new OpenAITranscriptionApi((transcription) =>
onRecognitionEnd(transcription),
),
);
}, []);

const startListening = async () => {
if (speechApi) {
await speechApi.start();
setIsListening(true);
}
};
const stopListening = async () => {
if (speechApi) {
if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
setIsTranscription(true);
await speechApi.stop();
setIsListening(false);
}
};
const onRecognitionEnd = (finalTranscript: string) => {
console.log(finalTranscript);
if (finalTranscript) props.setUserInput(finalTranscript);
if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
setIsTranscription(false);
};

return (
<div className={styles["chat-input-actions"]}>
{couldStop && (
Expand Down Expand Up @@ -828,16 +782,6 @@ export function ChatActions(props: {
icon={<ShortcutkeyIcon />}
/>
)}

{config.sttConfig.enable && (
<ChatAction
onClick={async () =>
isListening ? await stopListening() : await startListening()
}
text={isListening ? Locale.Chat.StopSpeak : Locale.Chat.StartSpeak}
icon={<VoiceWhiteIcon />}
/>
)}
</div>
);
}
Expand Down
12 changes: 0 additions & 12 deletions app/components/settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ import { nanoid } from "nanoid";
import { useMaskStore } from "../store/mask";
import { ProviderType } from "../utils/cloud";
import { TTSConfigList } from "./tts-config";
import { STTConfigList } from "./stt-config";

function EditPromptModal(props: { id: string; onClose: () => void }) {
const promptStore = usePromptStore();
Expand Down Expand Up @@ -1659,17 +1658,6 @@ export function Settings() {
/>
</List>

<List>
<STTConfigList
sttConfig={config.sttConfig}
updateConfig={(updater) => {
const sttConfig = { ...config.sttConfig };
updater(sttConfig);
config.update((config) => (config.sttConfig = sttConfig));
}}
/>
</List>

<DangerItems />
</div>
</ErrorBoundary>
Expand Down
Loading

0 comments on commit 3ae8ec1

Please sign in to comment.