Skip to content

Commit

Permalink
pinecone[patch]: Fix document ID not getting set when returned from P…
Browse files Browse the repository at this point in the history
…ineconeStore (langchain-ai#6539)

* added _formatMatches privatemethod

* updated similaritySearchVectorWithScore to use _formatMatches

* updated maxMarginalRelevanceSearch to use _formatMatches

* Add integration test

---------

Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
2 people authored and CarterMorris committed Nov 10, 2024
1 parent d737386 commit 64226da
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 43 deletions.
60 changes: 42 additions & 18 deletions libs/langchain-pinecone/src/tests/vectorstores.int.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* eslint-disable no-process-env */
/* eslint-disable @typescript-eslint/no-non-null-assertion */
/* eslint-disable no-promise-executor-return */
/* eslint-disable @typescript-eslint/no-explicit-any */
import { describe, expect, test } from "@jest/globals";
import { faker } from "@faker-js/faker";
import { Pinecone } from "@pinecone-database/pinecone";
Expand All @@ -9,12 +10,14 @@ import { SyntheticEmbeddings } from "@langchain/core/utils/testing";
import { Document } from "@langchain/core/documents";
import { PineconeStoreParams, PineconeStore } from "../vectorstores.js";

const PINECONE_SLEEP_LENGTH = 40000;

function sleep(ms: number) {
// eslint-disable-next-line no-promise-executor-return
return new Promise((resolve) => setTimeout(resolve, ms));
}

describe.skip("PineconeStore", () => {
describe("PineconeStore", () => {
let pineconeStore: PineconeStore;
const testIndexName = process.env.PINECONE_INDEX!;
let namespaces: string[] = [];
Expand Down Expand Up @@ -57,22 +60,29 @@ describe.skip("PineconeStore", () => {
[{ pageContent, metadata: {} }],
[documentId]
);
await sleep(35000);

await sleep(PINECONE_SLEEP_LENGTH);

const results = await pineconeStore.similaritySearch(pageContent, 1);

expect(results).toEqual([new Document({ metadata: {}, pageContent })]);
expect(results).toEqual([
new Document({ metadata: {}, pageContent, id: documentId }),
]);

await pineconeStore.addDocuments(
[{ pageContent: `${pageContent} upserted`, metadata: {} }],
[documentId]
);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);

const results2 = await pineconeStore.similaritySearch(pageContent, 1);

expect(results2).toEqual([
new Document({ metadata: {}, pageContent: `${pageContent} upserted` }),
new Document({
metadata: {},
pageContent: `${pageContent} upserted`,
id: documentId,
}),
]);
});

Expand All @@ -83,11 +93,15 @@ describe.skip("PineconeStore", () => {
{ pageContent, metadata: { foo: "bar" } },
]);

await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
const results = await pineconeStore.similaritySearch(pageContent, 1);

expect(results).toEqual([
new Document({ metadata: { foo: "bar" }, pageContent }),
new Document({
metadata: { foo: "bar" },
pageContent,
id: expect.any(String) as any,
}),
]);
});

Expand All @@ -100,14 +114,18 @@ describe.skip("PineconeStore", () => {
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: "qux" } },
]);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
// If the filter wasn't working, we'd get all 3 documents back
const results = await pineconeStore.similaritySearch(pageContent, 3, {
foo: id,
});

expect(results).toEqual([
new Document({ metadata: { foo: id }, pageContent }),
new Document({
metadata: { foo: id },
pageContent,
id: expect.any(String) as any,
}),
]);
});

Expand All @@ -120,7 +138,7 @@ describe.skip("PineconeStore", () => {
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: id } },
]);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
// If the filter wasn't working, we'd get all 3 documents back
const results = await pineconeStore.maxMarginalRelevanceSearch(
pageContent,
Expand All @@ -142,7 +160,7 @@ describe.skip("PineconeStore", () => {
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: id } },
]);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
const results = await pineconeStore.similaritySearch(pageContent, 2, {
foo: id,
});
Expand Down Expand Up @@ -174,7 +192,7 @@ describe.skip("PineconeStore", () => {
ids: [id, id2],
}
);
await sleep(40000);
await sleep(PINECONE_SLEEP_LENGTH);
const indexStats = await pineconeStore.pineconeIndex.describeIndexStats();
expect(indexStats.namespaces).toHaveProperty("");
expect(indexStats.namespaces?.[""].recordCount).toEqual(2);
Expand All @@ -184,7 +202,7 @@ describe.skip("PineconeStore", () => {
await pineconeStore.delete({
deleteAll: true,
});
await sleep(40000);
await sleep(PINECONE_SLEEP_LENGTH);
const indexStats2 = await pineconeStore.pineconeIndex.describeIndexStats();
expect(indexStats2.namespaces).not.toHaveProperty("");
// The new total records should be less than the previous total records
Expand All @@ -209,7 +227,7 @@ describe.skip("PineconeStore", () => {
namespace: namespaces[1],
}
);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);
const results = await pineconeStore.similaritySearch(pageContent, 1, {
namespace: namespaces[0],
});
Expand All @@ -234,22 +252,28 @@ describe.skip("PineconeStore", () => {
});

await store.addDocuments([{ pageContent, metadata: {} }], [documentId]);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);

const results = await store.similaritySearch(pageContent, 1);

expect(results).toEqual([new Document({ metadata: {}, pageContent })]);
expect(results).toEqual([
new Document({ metadata: {}, pageContent, id: documentId }),
]);

await store.addDocuments(
[{ pageContent: `${pageContent} upserted`, metadata: {} }],
[documentId]
);
await sleep(35000);
await sleep(PINECONE_SLEEP_LENGTH);

const results2 = await store.similaritySearch(pageContent, 1);

expect(results2).toEqual([
new Document({ metadata: {}, pageContent: `${pageContent} upserted` }),
new Document({
metadata: {},
pageContent: `${pageContent} upserted`,
id: documentId,
}),
]);
});
});
66 changes: 41 additions & 25 deletions libs/langchain-pinecone/src/vectorstores.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
RecordMetadata,
PineconeRecord,
Index as PineconeIndex,
ScoredPineconeRecord,
} from "@pinecone-database/pinecone";

import type { EmbeddingsInterface } from "@langchain/core/embeddings";
Expand Down Expand Up @@ -401,6 +402,40 @@ export class PineconeStore extends VectorStore {
return results;
}

/**
* Format the matching results from the Pinecone query.
* @param matches Matching results from the Pinecone query.
* @returns An array of arrays, where each inner array contains a document and its score.
*/
private _formatMatches(
matches: ScoredPineconeRecord<RecordMetadata>[] = []
): [Document, number][] {
const documentsWithScores: [Document, number][] = [];

for (const record of matches) {
const {
id,
score,
metadata: { [this.textKey]: pageContent, ...metadata } = {
[this.textKey]: "",
},
} = record;

if (score) {
documentsWithScores.push([
new Document({
id,
pageContent: pageContent.toString(),
metadata,
}),
score,
]);
}
}

return documentsWithScores;
}

/**
* Method that performs a similarity search in the Pinecone database and
* returns the results along with their scores.
Expand All @@ -414,20 +449,10 @@ export class PineconeStore extends VectorStore {
k: number,
filter?: PineconeMetadata
): Promise<[Document, number][]> {
const results = await this._runPineconeQuery(query, k, filter);
const result: [Document, number][] = [];

if (results.matches) {
for (const res of results.matches) {
const { [this.textKey]: pageContent, ...metadata } = (res.metadata ??
{}) as PineconeMetadata;
if (res.score) {
result.push([new Document({ metadata, pageContent }), res.score]);
}
}
}
const { matches = [] } = await this._runPineconeQuery(query, k, filter);
const records = this._formatMatches(matches);

return result;
return records;
}

/**
Expand Down Expand Up @@ -457,7 +482,7 @@ export class PineconeStore extends VectorStore {
{ includeValues: true }
);

const matches = results?.matches ?? [];
const { matches = [] } = results;
const embeddingList = matches.map((match) => match.values);

const mmrIndexes = maximalMarginalRelevance(
Expand All @@ -468,17 +493,8 @@ export class PineconeStore extends VectorStore {
);

const topMmrMatches = mmrIndexes.map((idx) => matches[idx]);

const finalResult: Document[] = [];
for (const res of topMmrMatches) {
const { [this.textKey]: pageContent, ...metadata } = (res.metadata ??
{}) as PineconeMetadata;
if (res.score) {
finalResult.push(new Document({ metadata, pageContent }));
}
}

return finalResult;
const records = this._formatMatches(topMmrMatches);
return records.map(([doc, _score]) => doc);
}

/**
Expand Down

0 comments on commit 64226da

Please sign in to comment.