Skip to content

Commit

Permalink
✨ feat: add vLLM provider support (#6154)
Browse files Browse the repository at this point in the history
* ✨ feat: add vLLM provider support

* 💄 style: update model list
  • Loading branch information
hezhijie0327 authored Feb 14, 2025
1 parent 6b6bd5c commit 1708e32
Show file tree
Hide file tree
Showing 12 changed files with 186 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ ENV \
TOGETHERAI_API_KEY="" TOGETHERAI_MODEL_LIST="" \
# Upstage
UPSTAGE_API_KEY="" UPSTAGE_MODEL_LIST="" \
# vLLM
VLLM_API_KEY="" VLLM_MODEL_LIST="" VLLM_PROXY_URL="" \
# Wenxin
WENXIN_API_KEY="" WENXIN_MODEL_LIST="" \
# xAI
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile.database
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ ENV \
TOGETHERAI_API_KEY="" TOGETHERAI_MODEL_LIST="" \
# Upstage
UPSTAGE_API_KEY="" UPSTAGE_MODEL_LIST="" \
# vLLM
VLLM_API_KEY="" VLLM_MODEL_LIST="" VLLM_PROXY_URL="" \
# Wenxin
WENXIN_API_KEY="" WENXIN_MODEL_LIST="" \
# xAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
TaichuProviderCard,
TogetherAIProviderCard,
UpstageProviderCard,
VLLMProviderCard,
WenxinProviderCard,
XAIProviderCard,
ZeroOneProviderCard,
Expand Down Expand Up @@ -57,6 +58,7 @@ export const useProviderList = (): ProviderItem[] => {
OpenAIProvider,
AzureProvider,
OllamaProvider,
VLLMProviderCard,
AnthropicProviderCard,
BedrockProvider,
GoogleProviderCard,
Expand Down
3 changes: 3 additions & 0 deletions src/config/aiModels/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import { default as taichu } from './taichu';
import { default as tencentcloud } from './tencentcloud';
import { default as togetherai } from './togetherai';
import { default as upstage } from './upstage';
import { default as vllm } from './vllm';
import { default as wenxin } from './wenxin';
import { default as xai } from './xai';
import { default as zeroone } from './zeroone';
Expand Down Expand Up @@ -99,6 +100,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({
tencentcloud,
togetherai,
upstage,
vllm,
wenxin,
xai,
zeroone,
Expand Down Expand Up @@ -142,6 +144,7 @@ export { default as taichu } from './taichu';
export { default as tencentcloud } from './tencentcloud';
export { default as togetherai } from './togetherai';
export { default as upstage } from './upstage';
export { default as vllm } from './vllm';
export { default as wenxin } from './wenxin';
export { default as xai } from './xai';
export { default as zeroone } from './zeroone';
Expand Down
94 changes: 94 additions & 0 deletions src/config/aiModels/vllm.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { AIChatModelCard } from '@/types/aiModel';

const vllmChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true
},
contextWindowTokens: 128_000,
description:
'Llama 3.1 是 Meta 推出的领先模型,支持高达 405B 参数,可应用于复杂对话、多语言翻译和数据分析领域。',
displayName: 'Llama 3.1 70B',
enabled: true,
id: 'meta-llama/Meta-Llama-3.1-70B',
type: 'chat',
},
{
abilities: {
functionCall: true
},
contextWindowTokens: 128_000,
description:
'Llama 3.1 是 Meta 推出的领先模型,支持高达 405B 参数,可应用于复杂对话、多语言翻译和数据分析领域。',
displayName: 'Llama 3.1 405B Instruct',
id: 'meta-llama/Meta-Llama-3.1-405B-Instruct',
type: 'chat',
},
{
contextWindowTokens: 8192,
description:
'Gemma 2 是 Google 推出的高效模型,涵盖从小型应用到复杂数据处理的多种应用场景。',
displayName: 'Gemma 2 9B',
id: 'google/gemma-2-9b',
type: 'chat',
},
{
contextWindowTokens: 8192,
description:
'Gemma 2 是 Google 推出的高效模型,涵盖从小型应用到复杂数据处理的多种应用场景。',
displayName: 'Gemma 2 27B',
id: 'google/gemma-2-27b',
type: 'chat',
},
{
contextWindowTokens: 8192,
description:
'Mistral (7B) Instruct 以高性能著称,适用于多种语言任务。',
displayName: 'Mistral 7B Instruct v0.1',
id: 'mistralai/Mistral-7B-Instruct-v0.1',
type: 'chat',
},
{
contextWindowTokens: 32_768,
description:
'Mixtral-8x7B Instruct (46.7B) 提供高容量的计算框架,适合大规模数据处理。',
displayName: 'Mistral 8x7B Instruct v0.1',
id: 'mistralai/Mixtral-8x7B-Instruct-v0.1',
type: 'chat',
},
{
abilities: {
functionCall: true
},
contextWindowTokens: 65_536,
description:
'DeepSeek-V3 是一款拥有 6710 亿参数的混合专家(MoE)语言模型,采用多头潜在注意力(MLA)和 DeepSeekMoE 架构,结合无辅助损失的负载平衡策略,优化推理和训练效率。通过在 14.8 万亿高质量tokens上预训练,并进行监督微调和强化学习,DeepSeek-V3 在性能上超越其他开源模型,接近领先闭源模型。',
displayName: 'DeepSeek V3',
enabled: true,
id: 'deepseek-ai/DeepSeek-V3',
type: 'chat',
},
{
abilities: {
reasoning: true
},
contextWindowTokens: 32_768,
description: 'Qwen QwQ 是由 Qwen 团队开发的实验研究模型,专注于提升AI推理能力。',
displayName: 'QwQ 32B Preview',
enabled: true,
id: 'Qwen/QwQ-32B-Preview',
type: 'chat',
},
{
contextWindowTokens: 32_768,
description: 'Qwen2-7B-Instruct 是 Qwen2 系列中的指令微调大语言模型,参数规模为 7B。该模型基于 Transformer 架构,采用了 SwiGLU 激活函数、注意力 QKV 偏置和组查询注意力等技术。它能够处理大规模输入。该模型在语言理解、生成、多语言能力、编码、数学和推理等多个基准测试中表现出色,超越了大多数开源模型,并在某些任务上展现出与专有模型相当的竞争力。Qwen2-7B-Instruct 在多项评测中均优于 Qwen1.5-7B-Chat,显示出显著的性能提升',
displayName: 'Qwen2 7B Instruct',
enabled: true,
id: 'Qwen/Qwen2-7B-Instruct',
type: 'chat',
},
]

export const allModels = [...vllmChatModels];

export default allModels;
6 changes: 6 additions & 0 deletions src/config/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ export const getLLMConfig = () => {

ENABLED_OLLAMA: z.boolean(),

ENABLED_VLLM: z.boolean(),
VLLM_API_KEY: z.string().optional(),

ENABLED_QWEN: z.boolean(),
QWEN_API_KEY: z.string().optional(),

Expand Down Expand Up @@ -196,6 +199,9 @@ export const getLLMConfig = () => {

ENABLED_OLLAMA: process.env.ENABLED_OLLAMA !== '0',

ENABLED_VLLM: !!process.env.VLLM_API_KEY,
VLLM_API_KEY: process.env.VLLM_API_KEY,

ENABLED_QWEN: !!process.env.QWEN_API_KEY,
QWEN_API_KEY: process.env.QWEN_API_KEY,

Expand Down
4 changes: 4 additions & 0 deletions src/config/modelProviders/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import TaichuProvider from './taichu';
import TencentcloudProvider from './tencentcloud';
import TogetherAIProvider from './togetherai';
import UpstageProvider from './upstage';
import VLLMProvider from './vllm';
import WenxinProvider from './wenxin';
import XAIProvider from './xai';
import ZeroOneProvider from './zeroone';
Expand All @@ -58,6 +59,7 @@ export const LOBE_DEFAULT_MODEL_LIST: ChatModelCard[] = [
MistralProvider.chatModels,
MoonshotProvider.chatModels,
OllamaProvider.chatModels,
VLLMProvider.chatModels,
OpenRouterProvider.chatModels,
TogetherAIProvider.chatModels,
FireworksAIProvider.chatModels,
Expand Down Expand Up @@ -89,6 +91,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
OpenAIProvider,
{ ...AzureProvider, chatModels: [] },
OllamaProvider,
VLLMProvider,
AnthropicProvider,
BedrockProvider,
GoogleProvider,
Expand Down Expand Up @@ -175,6 +178,7 @@ export { default as TaichuProviderCard } from './taichu';
export { default as TencentCloudProviderCard } from './tencentcloud';
export { default as TogetherAIProviderCard } from './togetherai';
export { default as UpstageProviderCard } from './upstage';
export { default as VLLMProviderCard } from './vllm';
export { default as WenxinProviderCard } from './wenxin';
export { default as XAIProviderCard } from './xai';
export { default as ZeroOneProviderCard } from './zeroone';
Expand Down
20 changes: 20 additions & 0 deletions src/config/modelProviders/vllm.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { ModelProviderCard } from '@/types/llm';

const VLLM: ModelProviderCard = {
chatModels: [],
description: 'vLLM 是一个快速且易于使用的库,用于 LLM 推理和服务。',
id: 'vllm',
modelList: { showModelFetcher: true },
modelsUrl: 'https://docs.vllm.ai/en/latest/models/supported_models.html#supported-models',
name: 'vLLM',
settings: {
proxyUrl: {
placeholder: 'http://localhost:8000/v1',
},
sdkType: 'openai',
showModelFetcher: true,
},
url: 'https://docs.vllm.ai',
};

export default VLLM;
7 changes: 7 additions & 0 deletions src/libs/agent-runtime/AgentRuntime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import {
TextToSpeechPayload,
} from './types';
import { LobeUpstageAI } from './upstage';
import { LobeVLLMAI } from './vllm';
import { LobeWenxinAI } from './wenxin';
import { LobeXAI } from './xai';
import { LobeZeroOneAI } from './zeroone';
Expand Down Expand Up @@ -172,6 +173,7 @@ class AgentRuntime {
tencentcloud: Partial<ClientOptions>;
togetherai: Partial<ClientOptions>;
upstage: Partial<ClientOptions>;
vllm: Partial<ClientOptions>;
wenxin: Partial<ClientOptions>;
xai: Partial<ClientOptions>;
zeroone: Partial<ClientOptions>;
Expand Down Expand Up @@ -227,6 +229,11 @@ class AgentRuntime {
break;
}

case ModelProvider.VLLM: {
runtimeModel = new LobeVLLMAI(params.vllm);
break;
}

case ModelProvider.Perplexity: {
runtimeModel = new LobePerplexityAI(params.perplexity);
break;
Expand Down
1 change: 1 addition & 0 deletions src/libs/agent-runtime/types/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export enum ModelProvider {
TencentCloud = 'tencentcloud',
TogetherAI = 'togetherai',
Upstage = 'upstage',
VLLM = 'vllm',
Wenxin = 'wenxin',
XAI = 'xai',
ZeroOne = 'zeroone',
Expand Down
44 changes: 44 additions & 0 deletions src/libs/agent-runtime/vllm/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { ModelProvider } from '../types';
import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';

import type { ChatModelCard } from '@/types/llm';

export interface VLLMModelCard {
id: string;
}

export const LobeVLLMAI = LobeOpenAICompatibleFactory({
baseURL: 'http://localhost:8000/v1',
debug: {
chatCompletion: () => process.env.DEBUG_VLLM_CHAT_COMPLETION === '1',
},
models: async ({ client }) => {
const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels');

const modelsPage = await client.models.list() as any;
const modelList: VLLMModelCard[] = modelsPage.data;

return modelList
.map((model) => {
const knownModel = LOBE_DEFAULT_MODEL_LIST.find((m) => model.id.toLowerCase() === m.id.toLowerCase());

return {
contextWindowTokens: knownModel?.contextWindowTokens ?? undefined,
displayName: knownModel?.displayName ?? undefined,
enabled: knownModel?.enabled || false,
functionCall:
knownModel?.abilities?.functionCall
|| false,
id: model.id,
reasoning:
knownModel?.abilities?.reasoning
|| false,
vision:
knownModel?.abilities?.vision
|| false,
};
})
.filter(Boolean) as ChatModelCard[];
},
provider: ModelProvider.VLLM,
});
1 change: 1 addition & 0 deletions src/types/user/settings/keyVaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export interface UserKeyVaults {
tencentcloud?: OpenAICompatibleKeyVault;
togetherai?: OpenAICompatibleKeyVault;
upstage?: OpenAICompatibleKeyVault;
vllm?: OpenAICompatibleKeyVault;
wenxin?: OpenAICompatibleKeyVault;
xai?: OpenAICompatibleKeyVault;
zeroone?: OpenAICompatibleKeyVault;
Expand Down

0 comments on commit 1708e32

Please sign in to comment.