From 6ae38300613a32a1bc6ff4d09c80c3266b8c7ddf Mon Sep 17 00:00:00 2001 From: Sachin Padmanabhan Date: Tue, 25 Feb 2025 10:58:40 -0800 Subject: [PATCH] add models incl * claude 3.7 on bedrock * anthropic on vertex * perplexity r1 1776 * mistral saba * gemini learnlm some display and info changes --- packages/proxy/schema/index.ts | 31 +++- packages/proxy/schema/models.ts | 274 ++++++++++++++++++++++++++------ packages/proxy/src/proxy.ts | 44 ++++- 3 files changed, 291 insertions(+), 58 deletions(-) diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts index 26c752a..ddd403c 100644 --- a/packages/proxy/schema/index.ts +++ b/packages/proxy/schema/index.ts @@ -139,6 +139,7 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = { "sonar-pro": ["perplexity"], "sonar-reasoning": ["perplexity"], "sonar-reasoning-pro": ["perplexity"], + "r1-1776": ["perplexity"], "meta/llama-2-70b-chat": ["replicate"], "mistralai/Mistral-7B-Instruct-v0.1": ["together"], "mistralai/Mixtral-8x22B": ["together"], @@ -196,6 +197,8 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = { "codestral-latest": ["mistral"], "open-mixtral-8x22b": ["mistral"], "open-codestral-mamba": ["mistral"], + "mistral-saba-latest": ["mistral"], + "mistral-saba-2502": ["mistral"], "mistral-tiny": ["mistral"], "mistral-small": ["mistral"], "mistral-medium": ["mistral"], @@ -229,6 +232,11 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = { "llama-3.2-11b-vision-preview": ["groq"], "llama-3.2-3b-preview": ["groq"], "llama-3.2-1b-preview": ["groq"], + "llama-guard-3-8b": ["groq"], + "deepseek-r1-distill-llama-70b-specdec": ["groq"], + "deepseek-r1-distill-qwen-32b": ["groq"], + "qwen-2.5-32b": ["groq"], + "qwen-2.5-coder-32b": ["groq"], "llama3-3-70b": ["lepton"], "llama3-2-3b": ["lepton"], "llama3-2-1b": ["lepton"], @@ -262,12 +270,13 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = { "accounts/fireworks/models/mistral-small-24b-instruct-2501": ["fireworks"], "accounts/fireworks/models/mixtral-8x7b-instruct": ["fireworks"], "accounts/fireworks/models/phi-3-vision-128k-instruct": ["fireworks"], - "anthropic.claude-3-5-sonnet-20241022-v2:0": ["bedrock"], - "anthropic.claude-3-5-haiku-20241022-v1:0": ["bedrock"], - "anthropic.claude-3-opus-20240229-v1:0": ["bedrock"], - "anthropic.claude-3-haiku-20240307-v1:0": ["bedrock"], - "anthropic.claude-3-sonnet-20240229-v1:0": ["bedrock"], + "us.anthropic.claude-3-7-sonnet-20250219-v1:0": ["bedrock"], + "us.anthropic.claude-3-5-haiku-20241022-v1:0": ["bedrock"], + "us.anthropic.claude-3-5-sonnet-20241022-v2:0": ["bedrock"], "anthropic.claude-3-5-sonnet-20240620-v1:0": ["bedrock"], + "us.anthropic.claude-3-opus-20240229-v1:0": ["bedrock"], + "anthropic.claude-3-sonnet-20240229-v1:0": ["bedrock"], + "anthropic.claude-3-haiku-20240307-v1:0": ["bedrock"], "amazon.nova-pro-v1:0": ["bedrock"], "amazon.nova-lite-v1:0": ["bedrock"], "amazon.nova-micro-v1:0": ["bedrock"], @@ -302,6 +311,18 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = { "publishers/mistralai/models/mistral-nemo": ["vertex"], "publishers/mistralai/models/codestral-2501": ["vertex"], "publishers/google/models/gemini-2.0-pro-exp-02-05": ["vertex"], + "publishers/anthropic/models/claude-3-7-sonnet": ["vertex"], + "publishers/anthropic/models/claude-3-7-sonnet@20250219": ["vertex"], + "publishers/anthropic/models/claude-3-5-haiku": ["vertex"], + "publishers/anthropic/models/claude-3-5-haiku@20241022": ["vertex"], + "publishers/anthropic/models/claude-3-5-sonnet-v2": ["vertex"], + "publishers/anthropic/models/claude-3-5-sonnet-v2@20241022": ["vertex"], + "publishers/anthropic/models/claude-3-5-sonnet": ["vertex"], + "publishers/anthropic/models/claude-3-5-sonnet@20240620": ["vertex"], + "publishers/anthropic/models/claude-3-opus": ["vertex"], + "publishers/anthropic/models/claude-3-opus@20240229": ["vertex"], + "publishers/anthropic/models/claude-3-haiku": ["vertex"], + "publishers/anthropic/models/claude-3-haiku@20240307": ["vertex"], }; export function getModelEndpointTypes(model: string): ModelEndpointType[] { diff --git a/packages/proxy/schema/models.ts b/packages/proxy/schema/models.ts index 36e1aed..912dc6d 100644 --- a/packages/proxy/schema/models.ts +++ b/packages/proxy/schema/models.ts @@ -395,6 +395,22 @@ export const AvailableModels: { [name: string]: ModelSpec } = { output_cost_per_mil_tokens: 15, parent: "claude-3-7-sonnet-latest", }, + "claude-3-5-haiku-latest": { + format: "anthropic", + flavor: "chat", + multimodal: true, + input_cost_per_mil_tokens: 0.8, + output_cost_per_mil_tokens: 4, + displayName: "Claude 3.5 Haiku", + }, + "claude-3-5-haiku-20241022": { + format: "anthropic", + flavor: "chat", + multimodal: true, + input_cost_per_mil_tokens: 0.8, + output_cost_per_mil_tokens: 4, + parent: "claude-3-5-haiku-latest", + }, "claude-3-5-sonnet-latest": { format: "anthropic", flavor: "chat", @@ -419,20 +435,6 @@ export const AvailableModels: { [name: string]: ModelSpec } = { output_cost_per_mil_tokens: 15, parent: "claude-3-5-sonnet-latest", }, - "claude-3-5-haiku-latest": { - format: "anthropic", - flavor: "chat", - input_cost_per_mil_tokens: 1, - output_cost_per_mil_tokens: 5, - displayName: "Claude 3.5 Haiku", - }, - "claude-3-5-haiku-20241022": { - format: "anthropic", - flavor: "chat", - input_cost_per_mil_tokens: 1, - output_cost_per_mil_tokens: 5, - parent: "claude-3-5-haiku-latest", - }, "claude-3-opus-latest": { format: "anthropic", flavor: "chat", @@ -577,6 +579,13 @@ export const AvailableModels: { [name: string]: ModelSpec } = { output_cost_per_mil_tokens: 8, displayName: "Sonar Reasoning Pro", }, + "r1-1776": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 2, + output_cost_per_mil_tokens: 8, + displayName: "R1 1776", + }, // TOGETHER MODELS // Together Meta. @@ -1042,6 +1051,20 @@ export const AvailableModels: { [name: string]: ModelSpec } = { output_cost_per_mil_tokens: 0.04, parent: "ministral-3b-latest", }, + "mistral-saba-latest": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 0.2, + output_cost_per_mil_tokens: 0.6, + displayName: "Mistral Saba", + }, + "mistral-saba-2502": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 0.2, + output_cost_per_mil_tokens: 0.6, + parent: "mistral-saba-latest", + }, "pixtral-12b-2409": { format: "openai", flavor: "chat", @@ -1140,6 +1163,13 @@ export const AvailableModels: { [name: string]: ModelSpec } = { output_cost_per_mil_tokens: 0.1, displayName: "Llama 3 8B 8k", }, + "llama-guard-3-8b": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 0.2, + output_cost_per_mil_tokens: 0.2, + displayName: "Llama Guard 3 8B 8k", + }, "gemma2-9b-it": { format: "openai", flavor: "chat", @@ -1155,14 +1185,6 @@ export const AvailableModels: { [name: string]: ModelSpec } = { displayName: "Mixtral 8x7B 32k", }, // Groq experimental. - "deepseek-r1-distill-llama-70b": { - format: "openai", - flavor: "chat", - input_cost_per_mil_tokens: 0.23, - output_cost_per_mil_tokens: 0.69, - displayName: "DeepSeek R1 Distill Llama 70b", - experimental: true, - }, "llama-3.3-70b-specdec": { format: "openai", flavor: "chat", @@ -1205,6 +1227,46 @@ export const AvailableModels: { [name: string]: ModelSpec } = { displayName: "Llama 3.2 1B (Preview) 8k", experimental: true, }, + "deepseek-r1-distill-llama-70b": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 0.23, + output_cost_per_mil_tokens: 0.69, + displayName: "DeepSeek R1 Distill Llama 70b", + experimental: true, + }, + "deepseek-r1-distill-llama-70b-specdec": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 0.23, + output_cost_per_mil_tokens: 0.69, + displayName: "DeepSeek R1 Distill Llama 70b SpecDec", + experimental: true, + }, + "deepseek-r1-distill-qwen-32b": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 0.69, + output_cost_per_mil_tokens: 0.69, + displayName: "DeepSeek R1 Distill Qwen 32B 128k", + experimental: true, + }, + "qwen-2.5-32b": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 0.79, + output_cost_per_mil_tokens: 0.79, + displayName: "Qwen 2.5 32B Instruct 128k", + experimental: true, + }, + "qwen-2.5-coder-32b": { + format: "openai", + flavor: "chat", + input_cost_per_mil_tokens: 0.79, + output_cost_per_mil_tokens: 0.79, + displayName: "Qwen 2.5 Coder 32B Instruct 128k", + experimental: true, + }, // Groq deprecated. "gemma-7b-it": { format: "openai", @@ -1622,6 +1684,14 @@ export const AvailableModels: { [name: string]: ModelSpec } = { multimodal: true, experimental: true, }, + "learnlm-1.5-pro-experimental": { + format: "google", + flavor: "chat", + input_cost_per_mil_tokens: 0, + output_cost_per_mil_tokens: 0, + multimodal: true, + experimental: true, + }, // Gemini deprecated. "gemini-1.0-pro": { format: "google", @@ -1745,31 +1815,39 @@ export const AvailableModels: { [name: string]: ModelSpec } = { output_cost_per_mil_tokens: 0.2, displayName: "Titan Text Lite", }, - "anthropic.claude-3-5-sonnet-20241022-v2:0": { + "us.anthropic.claude-3-7-sonnet-20250219-v1:0": { format: "anthropic", flavor: "chat", multimodal: true, input_cost_per_mil_tokens: 3, output_cost_per_mil_tokens: 15, - displayName: "Claude 3.5 Sonnet v2", + displayName: "Claude 3.7 Sonnet", }, - "anthropic.claude-3-5-sonnet-20240620-v1:0": { + "us.anthropic.claude-3-5-haiku-20241022-v1:0": { + format: "anthropic", + flavor: "chat", + multimodal: true, + input_cost_per_mil_tokens: 0.8, + output_cost_per_mil_tokens: 4, + displayName: "Claude 3.5 Haiku", + }, + "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { format: "anthropic", flavor: "chat", multimodal: true, input_cost_per_mil_tokens: 3, output_cost_per_mil_tokens: 15, - displayName: "Claude 3.5 Sonnet", + displayName: "Claude 3.5 Sonnet v2", }, - "anthropic.claude-3-5-haiku-20241022-v1:0": { + "anthropic.claude-3-5-sonnet-20240620-v1:0": { format: "anthropic", flavor: "chat", multimodal: true, - input_cost_per_mil_tokens: 0.8, - output_cost_per_mil_tokens: 4, - displayName: "Claude 3.5 Haiku", + input_cost_per_mil_tokens: 3, + output_cost_per_mil_tokens: 15, + displayName: "Claude 3.5 Sonnet", }, - "anthropic.claude-3-opus-20240229-v1:0": { + "us.anthropic.claude-3-opus-20240229-v1:0": { format: "anthropic", flavor: "chat", multimodal: true, @@ -2003,33 +2081,92 @@ export const AvailableModels: { [name: string]: ModelSpec } = { flavor: "chat", parent: "publishers/google/models/gemini-1.0-pro", }, - "publishers/meta/models/llama-3.3-70b-instruct-maas": { - format: "openai", + "publishers/anthropic/models/claude-3-5-haiku": { + format: "anthropic", flavor: "chat", - displayName: "Llama 3.3 70B Instruct", + displayName: "Claude 3.5 Haiku", + input_cost_per_mil_tokens: 0.8, + output_cost_per_mil_tokens: 4, + multimodal: true, }, - "publishers/meta/models/llama-3.2-90b-vision-instruct-maas": { - format: "openai", + "publishers/anthropic/models/claude-3-5-haiku@20241022": { + format: "anthropic", flavor: "chat", - displayName: "Llama 3.2 90B Vision Instruct", + input_cost_per_mil_tokens: 0.8, + output_cost_per_mil_tokens: 4, multimodal: true, + parent: "publishers/anthropic/models/claude-3-5-haiku", }, - "publishers/meta/models/llama-3.1-401b-instruct-maas": { - format: "openai", + "publishers/anthropic/models/claude-3-5-sonnet-v2": { + format: "anthropic", flavor: "chat", - displayName: "Llama 3.1 401B Instruct", - input_cost_per_mil_tokens: 5, - output_cost_per_mil_tokens: 16, + displayName: "Claude 3.5 Sonnet v2", + input_cost_per_mil_tokens: 3, + output_cost_per_mil_tokens: 15, + multimodal: true, }, - "publishers/meta/models/llama-3.1-70b-instruct-maas": { - format: "openai", + "publishers/anthropic/models/claude-3-5-sonnet-v2@20241022": { + format: "anthropic", flavor: "chat", - displayName: "Llama 3.1 70B Instruct", + input_cost_per_mil_tokens: 3, + output_cost_per_mil_tokens: 15, + multimodal: true, + parent: "publishers/anthropic/models/claude-3-5-sonnet-v2", }, - "publishers/meta/models/llama-3.1-8b-instruct-maas": { + "publishers/anthropic/models/claude-3-5-sonnet": { + format: "anthropic", + flavor: "chat", + displayName: "Claude 3.5 Sonnet", + input_cost_per_mil_tokens: 3, + output_cost_per_mil_tokens: 15, + multimodal: true, + }, + "publishers/anthropic/models/claude-3-5-sonnet@20240620": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 3, + output_cost_per_mil_tokens: 15, + multimodal: true, + parent: "publishers/anthropic/models/claude-3-5-sonnet", + }, + "publishers/anthropic/models/claude-3-opus": { + format: "anthropic", + flavor: "chat", + displayName: "Claude 3 Opus", + input_cost_per_mil_tokens: 15, + output_cost_per_mil_tokens: 75, + multimodal: true, + }, + "publishers/anthropic/models/claude-3-opus@20240229": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 15, + output_cost_per_mil_tokens: 75, + multimodal: true, + parent: "publishers/anthropic/models/claude-3-opus", + }, + "publishers/anthropic/models/claude-3-haiku": { + format: "anthropic", + flavor: "chat", + displayName: "Claude 3 Haiku", + input_cost_per_mil_tokens: 0.25, + output_cost_per_mil_tokens: 1.25, + multimodal: true, + }, + "publishers/anthropic/models/claude-3-haiku@20240307": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 0.25, + output_cost_per_mil_tokens: 1.25, + multimodal: true, + parent: "publishers/anthropic/models/claude-3-haiku", + }, + "publishers/meta/models/llama-3.1-401b-instruct-maas": { format: "openai", flavor: "chat", - displayName: "Llama 3.1 8B Instruct", + displayName: "Llama 3.1 401B Instruct", + input_cost_per_mil_tokens: 5, + output_cost_per_mil_tokens: 16, }, "publishers/mistralai/models/mistral-large-2411": { format: "openai", @@ -2060,6 +2197,49 @@ export const AvailableModels: { [name: string]: ModelSpec } = { multimodal: true, experimental: true, }, + "publishers/anthropic/models/claude-3-7-sonnet": { + format: "anthropic", + flavor: "chat", + displayName: "Claude 3.7 Sonnet (Preview)", + input_cost_per_mil_tokens: 3, + output_cost_per_mil_tokens: 15, + multimodal: true, + experimental: true, + }, + "publishers/anthropic/models/claude-3-7-sonnet@20250219": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 3, + output_cost_per_mil_tokens: 15, + multimodal: true, + experimental: true, + parent: "publishers/anthropic/models/claude-3-7-sonnet", + }, + "publishers/meta/models/llama-3.3-70b-instruct-maas": { + format: "openai", + flavor: "chat", + displayName: "Llama 3.3 70B Instruct", + experimental: true, + }, + "publishers/meta/models/llama-3.2-90b-vision-instruct-maas": { + format: "openai", + flavor: "chat", + displayName: "Llama 3.2 90B Vision Instruct", + multimodal: true, + experimental: true, + }, + "publishers/meta/models/llama-3.1-70b-instruct-maas": { + format: "openai", + flavor: "chat", + displayName: "Llama 3.1 70B Instruct", + experimental: true, + }, + "publishers/meta/models/llama-3.1-8b-instruct-maas": { + format: "openai", + flavor: "chat", + displayName: "Llama 3.1 8B Instruct", + experimental: true, + }, "text-block": { format: "js", diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index 31f4df2..a4cd836 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -1091,7 +1091,14 @@ async function fetchModel( ); case "anthropic": console.assert(method === "POST"); - return await fetchAnthropic("POST", url, headers, bodyData, secret); + return await fetchAnthropic( + "POST", + url, + modelSpec, + headers, + bodyData, + secret, + ); case "google": console.assert(method === "POST"); return await fetchGoogle( @@ -1419,6 +1426,7 @@ async function fetchOpenAIFakeStream({ async function fetchAnthropic( method: "POST", url: string, + modelSpec: ModelSpec | null, headers: Record, bodyData: null | any, secret: APISecret, @@ -1426,11 +1434,13 @@ async function fetchAnthropic( console.assert(url === "/chat/completions"); // https://docs.anthropic.com/claude/reference/complete_post - headers["accept"] = "application/json"; - headers["anthropic-version"] = "2023-06-01"; - const fullURL = new URL(EndpointProviderToBaseURL.anthropic + "/messages"); - headers["host"] = fullURL.host; - headers["x-api-key"] = secret.secret; + let fullURL = new URL(EndpointProviderToBaseURL.anthropic + "/messages"); + if (secret.type !== "vertex") { + headers["accept"] = "application/json"; + headers["anthropic-version"] = "2023-06-01"; + headers["host"] = fullURL.host; + headers["x-api-key"] = secret.secret; + } if (isEmpty(bodyData)) { throw new ProxyBadRequestError( @@ -1549,6 +1559,28 @@ async function fetchAnthropic( isFunction, isStructuredOutput, }); + } else if (secret.type === "vertex") { + const { project, authType } = VertexMetadataSchema.parse(secret.metadata); + const locations = modelSpec?.locations?.length + ? modelSpec.locations + : ["us-east5"]; + const location = locations[Math.floor(Math.random() * locations.length)]; + fullURL = new URL( + `https://${location}-aiplatform.googleapis.com/v1/projects/${project}/locations/${location}/${params.model}:${params.stream ? "streamRawPredict" : "rawPredict"}`, + ); + let accessToken: string | null | undefined = undefined; + if (authType === "access_token") { + accessToken = secret.secret; + } else { + // authType === "service_account_key" + accessToken = await getGoogleAccessToken(secret.secret); + } + if (!accessToken) { + throw new Error("Failed to get Google access token"); + } + headers["authorization"] = `Bearer ${accessToken}`; + params["anthropic_version"] = "vertex-2023-10-16"; + delete params.model; } const proxyResponse = await fetch(fullURL.toString(), {