From 86f86962fb0725b888cee6ebd9eb9f818a0c9cee Mon Sep 17 00:00:00 2001
From: Shenghang Tsai <jackalcooper@gmail.com>
Date: Mon, 10 Feb 2025 13:37:48 +0800
Subject: [PATCH] Support VLM on SiliconFlow

---
 app/client/platforms/siliconflow.ts | 8 ++++++--
 app/constant.ts                     | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/app/client/platforms/siliconflow.ts b/app/client/platforms/siliconflow.ts
index 1ad316a6143..17650a9c69b 100644
--- a/app/client/platforms/siliconflow.ts
+++ b/app/client/platforms/siliconflow.ts
@@ -13,7 +13,7 @@ import {
   ChatMessageTool,
   usePluginStore,
 } from "@/app/store";
-import { streamWithThink } from "@/app/utils/chat";
+import { preProcessImageContent, streamWithThink } from "@/app/utils/chat";
 import {
   ChatOptions,
   getHeaders,
@@ -25,6 +25,7 @@ import { getClientConfig } from "@/app/config/client";
 import {
   getMessageTextContent,
   getMessageTextContentWithoutThinking,
+  isVisionModel,
 } from "@/app/utils";
 import { RequestPayload } from "./openai";
 import { fetch } from "@/app/utils/stream";
@@ -71,13 +72,16 @@ export class SiliconflowApi implements LLMApi {
   }
 
   async chat(options: ChatOptions) {
+    const visionModel = isVisionModel(options.config.model);
     const messages: ChatOptions["messages"] = [];
     for (const v of options.messages) {
       if (v.role === "assistant") {
         const content = getMessageTextContentWithoutThinking(v);
         messages.push({ role: v.role, content });
       } else {
-        const content = getMessageTextContent(v);
+        const content = visionModel
+          ? await preProcessImageContent(v.content)
+          : getMessageTextContent(v);
         messages.push({ role: v.role, content });
       }
     }
diff --git a/app/constant.ts b/app/constant.ts
index 09eec44b68d..d9cb62bf934 100644
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -462,6 +462,7 @@ export const VISION_MODEL_REGEXES = [
   /gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview"
   /^dall-e-3$/, // Matches exactly "dall-e-3"
   /glm-4v/,
+  /vl/i,
 ];
 
 export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];