From 86f86962fb0725b888cee6ebd9eb9f818a0c9cee Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Mon, 10 Feb 2025 13:37:48 +0800 Subject: [PATCH] Support VLM on SiliconFlow --- app/client/platforms/siliconflow.ts | 8 ++++++-- app/constant.ts | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/app/client/platforms/siliconflow.ts b/app/client/platforms/siliconflow.ts index 1ad316a6143..17650a9c69b 100644 --- a/app/client/platforms/siliconflow.ts +++ b/app/client/platforms/siliconflow.ts @@ -13,7 +13,7 @@ import { ChatMessageTool, usePluginStore, } from "@/app/store"; -import { streamWithThink } from "@/app/utils/chat"; +import { preProcessImageContent, streamWithThink } from "@/app/utils/chat"; import { ChatOptions, getHeaders, @@ -25,6 +25,7 @@ import { getClientConfig } from "@/app/config/client"; import { getMessageTextContent, getMessageTextContentWithoutThinking, + isVisionModel, } from "@/app/utils"; import { RequestPayload } from "./openai"; import { fetch } from "@/app/utils/stream"; @@ -71,13 +72,16 @@ export class SiliconflowApi implements LLMApi { } async chat(options: ChatOptions) { + const visionModel = isVisionModel(options.config.model); const messages: ChatOptions["messages"] = []; for (const v of options.messages) { if (v.role === "assistant") { const content = getMessageTextContentWithoutThinking(v); messages.push({ role: v.role, content }); } else { - const content = getMessageTextContent(v); + const content = visionModel + ? await preProcessImageContent(v.content) + : getMessageTextContent(v); messages.push({ role: v.role, content }); } } diff --git a/app/constant.ts b/app/constant.ts index 09eec44b68d..d9cb62bf934 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -462,6 +462,7 @@ export const VISION_MODEL_REGEXES = [ /gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview" /^dall-e-3$/, // Matches exactly "dall-e-3" /glm-4v/, + /vl/i, ]; export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];