diff --git a/libs/langchain-google-genai/src/tests/chat_models.int.test.ts b/libs/langchain-google-genai/src/tests/chat_models.int.test.ts index ddf5aee6bfc7..765240a73ffa 100644 --- a/libs/langchain-google-genai/src/tests/chat_models.int.test.ts +++ b/libs/langchain-google-genai/src/tests/chat_models.int.test.ts @@ -180,15 +180,15 @@ async function fileToBase64(filePath: string): Promise { return base64String; } -test.skip("Gemini can understand audio", async () => { +test("Gemini can understand audio", async () => { // Update this with the correct path to an audio file on your machine. - const audioPath = - "/Users/bracesproul/code/lang-chain-ai/langchainjs/libs/langchain-google-gauth/src/tests/data/audio.mp3"; - const audioMimeType = "audio/mp3"; + const audioPath = "./src/tests/data/gettysburg10.wav"; + const audioMimeType = "audio/wav"; const model = new ChatGoogleGenerativeAI({ - model: "gemini-1.5-pro-latest", + model: "gemini-1.5-flash", temperature: 0, + maxRetries: 0, }); const audioBase64 = await fileToBase64(audioPath); diff --git a/libs/langchain-google-genai/src/tests/data/gettysburg10.wav b/libs/langchain-google-genai/src/tests/data/gettysburg10.wav new file mode 100644 index 000000000000..10437f541c8b Binary files /dev/null and b/libs/langchain-google-genai/src/tests/data/gettysburg10.wav differ diff --git a/libs/langchain-google-vertexai/src/tests/chat_models.int.test.ts b/libs/langchain-google-vertexai/src/tests/chat_models.int.test.ts index d6c2d2b77dec..a5b9b1001218 100644 --- a/libs/langchain-google-vertexai/src/tests/chat_models.int.test.ts +++ b/libs/langchain-google-vertexai/src/tests/chat_models.int.test.ts @@ -1,4 +1,5 @@ import { test } from "@jest/globals"; +import fs from "fs/promises"; import { BaseLanguageModelInput } from "@langchain/core/language_models/base"; import { ChatPromptValue } from "@langchain/core/prompt_values"; import { @@ -14,6 +15,10 @@ import { import { tool } from "@langchain/core/tools"; import { z } from "zod"; import { concat } from "@langchain/core/utils/stream"; +import { + ChatPromptTemplate, + MessagesPlaceholder, +} from "@langchain/core/prompts"; import { GeminiTool } from "../types.js"; import { ChatVertexAI } from "../chat_models.js"; @@ -352,3 +357,47 @@ test("ChatGoogleGenerativeAI can stream tools", async () => { expect(toolCalls[0].name).toBe("current_weather_tool"); expect(toolCalls[0].args).toHaveProperty("location"); }); + +async function fileToBase64(filePath: string): Promise { + const fileData = await fs.readFile(filePath); + const base64String = Buffer.from(fileData).toString("base64"); + return base64String; +} + +test("Gemini can understand audio", async () => { + // Update this with the correct path to an audio file on your machine. + const audioPath = "../langchain-google-genai/src/tests/data/gettysburg10.wav"; + const audioMimeType = "audio/wav"; + + const model = new ChatVertexAI({ + model: "gemini-1.5-flash", + temperature: 0, + maxRetries: 0, + }); + + const audioBase64 = await fileToBase64(audioPath); + + const prompt = ChatPromptTemplate.fromMessages([ + new MessagesPlaceholder("audio"), + ]); + + const chain = prompt.pipe(model); + const response = await chain.invoke({ + audio: new HumanMessage({ + content: [ + { + type: "media", + mimeType: audioMimeType, + data: audioBase64, + }, + { + type: "text", + text: "Summarize the content in this audio. ALso, what is the speaker's tone?", + }, + ], + }), + }); + + expect(typeof response.content).toBe("string"); + expect((response.content as string).length).toBeGreaterThan(15); +});