langchain-ai · bracesproul · Aug 16, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/libs/langchain-google-genai/src/tests/chat_models.int.test.ts b/libs/langchain-google-genai/src/tests/chat_models.int.test.ts
@@ -180,15 +180,15 @@ async function fileToBase64(filePath: string): Promise<string> {
   return base64String;
 }
 
-test.skip("Gemini can understand audio", async () => {
+test("Gemini can understand audio", async () => {
   // Update this with the correct path to an audio file on your machine.
-  const audioPath =
-    "/Users/bracesproul/code/lang-chain-ai/langchainjs/libs/langchain-google-gauth/src/tests/data/audio.mp3";
-  const audioMimeType = "audio/mp3";
+  const audioPath = "./src/tests/data/gettysburg10.wav";
+  const audioMimeType = "audio/wav";
 
   const model = new ChatGoogleGenerativeAI({
-    model: "gemini-1.5-pro-latest",
+    model: "gemini-1.5-flash",
     temperature: 0,
+    maxRetries: 0,
   });
 
   const audioBase64 = await fileToBase64(audioPath);

diff --git a/libs/langchain-google-genai/src/tests/data/gettysburg10.wav b/libs/langchain-google-genai/src/tests/data/gettysburg10.wav
diff --git a/libs/langchain-google-vertexai/src/tests/chat_models.int.test.ts b/libs/langchain-google-vertexai/src/tests/chat_models.int.test.ts
@@ -1,4 +1,5 @@
 import { test } from "@jest/globals";
+import fs from "fs/promises";
 import { BaseLanguageModelInput } from "@langchain/core/language_models/base";
 import { ChatPromptValue } from "@langchain/core/prompt_values";
 import {
@@ -14,6 +15,10 @@ import {
 import { tool } from "@langchain/core/tools";
 import { z } from "zod";
 import { concat } from "@langchain/core/utils/stream";
+import {
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from "@langchain/core/prompts";
 import { GeminiTool } from "../types.js";
 import { ChatVertexAI } from "../chat_models.js";
 
@@ -352,3 +357,47 @@ test("ChatGoogleGenerativeAI can stream tools", async () => {
   expect(toolCalls[0].name).toBe("current_weather_tool");
   expect(toolCalls[0].args).toHaveProperty("location");
 });
+
+async function fileToBase64(filePath: string): Promise<string> {
+  const fileData = await fs.readFile(filePath);
+  const base64String = Buffer.from(fileData).toString("base64");
+  return base64String;
+}
+
+test("Gemini can understand audio", async () => {
+  // Update this with the correct path to an audio file on your machine.
+  const audioPath = "../langchain-google-genai/src/tests/data/gettysburg10.wav";
+  const audioMimeType = "audio/wav";
+
+  const model = new ChatVertexAI({
+    model: "gemini-1.5-flash",
+    temperature: 0,
+    maxRetries: 0,
+  });
+
+  const audioBase64 = await fileToBase64(audioPath);
+
+  const prompt = ChatPromptTemplate.fromMessages([
+    new MessagesPlaceholder("audio"),
+  ]);
+
+  const chain = prompt.pipe(model);
+  const response = await chain.invoke({
+    audio: new HumanMessage({
+      content: [
+        {
+          type: "media",
+          mimeType: audioMimeType,
+          data: audioBase64,
+        },
+        {
+          type: "text",
+          text: "Summarize the content in this audio. ALso, what is the speaker's tone?",
+        },
+      ],
+    }),
+  });
+
+  expect(typeof response.content).toBe("string");
+  expect((response.content as string).length).toBeGreaterThan(15);
+});