Skip to content

Commit

Permalink
google[patch]: fix: handling multibyte characters in stream for googl…
Browse files Browse the repository at this point in the history
…e-vertexai-web (#6502)

* fix: handling multibyte characters in stream for google-vertexai-web

* fix: update import path to use .js on test
  • Loading branch information
pokutuna authored Aug 16, 2024
1 parent abdcd1f commit e029d5e
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 2 deletions.
55 changes: 55 additions & 0 deletions libs/langchain-google-common/src/tests/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import { expect, test } from "@jest/globals";
import { z } from "zod";
import { zodToGeminiParameters } from "../utils/zod_to_gemini_parameters.js";
import { ReadableJsonStream } from "../utils/stream.js";

test("zodToGeminiParameters can convert zod schema to gemini schema", () => {
const zodSchema = z
Expand Down Expand Up @@ -80,3 +81,57 @@ test("zodToGeminiParameters removes additional properties from arrays", () => {
expect((arrayItemsSchema as any).additionalProperties).toBeUndefined();
}
});

function toUint8Array(data: string): Uint8Array {
return new TextEncoder().encode(data);
}

test("ReadableJsonStream can handle stream", async () => {
const data = [
toUint8Array("["),
toUint8Array('{"i": 1}'),
toUint8Array('{"i'),
toUint8Array('": 2}'),
toUint8Array("]"),
];

const source = new ReadableStream({
start(controller) {
data.forEach((chunk) => controller.enqueue(chunk));
controller.close();
},
});
const stream = new ReadableJsonStream(source);
expect(await stream.nextChunk()).toEqual({ i: 1 });
expect(await stream.nextChunk()).toEqual({ i: 2 });
expect(await stream.nextChunk()).toBeNull();
expect(stream.streamDone).toEqual(true);
});

test("ReadableJsonStream can handle multibyte stream", async () => {
const data = [
toUint8Array("["),
toUint8Array('{"i": 1, "msg":"hello👋"}'),
toUint8Array('{"i": 2,'),
toUint8Array('"msg":"こん'),
new Uint8Array([0xe3]), // 1st byte of "に"
new Uint8Array([0x81, 0xab]), // 2-3rd bytes of "に"
toUint8Array("ちは"),
new Uint8Array([0xf0, 0x9f]), // first half bytes of "👋"
new Uint8Array([0x91, 0x8b]), // second half bytes of "👋"
toUint8Array('"}'),
toUint8Array("]"),
];

const source = new ReadableStream({
start(controller) {
data.forEach((chunk) => controller.enqueue(chunk));
controller.close();
},
});
const stream = new ReadableJsonStream(source);
expect(await stream.nextChunk()).toEqual({ i: 1, msg: "hello👋" });
expect(await stream.nextChunk()).toEqual({ i: 2, msg: "こんにちは👋" });
expect(await stream.nextChunk()).toBeNull();
expect(stream.streamDone).toEqual(true);
});
4 changes: 2 additions & 2 deletions libs/langchain-google-common/src/utils/stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ export class ReadableJsonStream extends JsonStream {

constructor(body: ReadableStream | null) {
super();
this.decoder = new TextDecoder();
this.decoder = new TextDecoder("utf-8");
if (body) {
void this.run(body);
} else {
Expand All @@ -266,7 +266,7 @@ export class ReadableJsonStream extends JsonStream {
while (!isDone) {
const { value, done } = await reader.read();
if (!done) {
const svalue = this.decoder.decode(value);
const svalue = this.decoder.decode(value, { stream: true });
this.appendBuffer(svalue);
} else {
isDone = done;
Expand Down

0 comments on commit e029d5e

Please sign in to comment.