From dbba89e528a89396f6aa93b3353c75eb0ce0ac5c Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Wed, 12 Apr 2023 17:22:14 +0100 Subject: [PATCH 1/3] Fix refine chain, add docs --- .../index_related_chains/document_qa.mdx | 15 ++++++++ examples/src/chains/qa_refine.ts | 36 +++++++++++++++++++ langchain/src/chains/combine_docs_chain.ts | 18 ++++++---- 3 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 examples/src/chains/qa_refine.ts diff --git a/docs/docs/modules/chains/index_related_chains/document_qa.mdx b/docs/docs/modules/chains/index_related_chains/document_qa.mdx index 2c5f0ca8bdcb..e8156bb34a6a 100644 --- a/docs/docs/modules/chains/index_related_chains/document_qa.mdx +++ b/docs/docs/modules/chains/index_related_chains/document_qa.mdx @@ -1,4 +1,9 @@ +--- +hide_table_of_contents: true +--- + import QAExample from "@examples/chains/question_answering.ts"; +import RefineExample from "@examples/chains/qa_refine.ts"; import CodeBlock from "@theme/CodeBlock"; # Document QA Chains @@ -6,4 +11,14 @@ import CodeBlock from "@theme/CodeBlock"; LangChain provides chains used for processing unstructured text data: `StuffDocumentsChain` and `MapReduceDocumentsChain`. These chains are the building blocks more complex chains for processing unstructured text data and receive both documents and a question as input. They then utilize the language model to provide an answer to the question based on the given documents. +- `StuffDocumentsChain`: This chain is the simplest of the 3 chains and simply injects all documents passes in into the prompt. It then returns the answer to the question, using all documents as context. It is suitable for QA tasks over a small number of documents. +- `MapReduceDocumentsChain`: This chain adds a preprocessing step to select relevant portions of each document until the total number of tokens is less than the maximum number of tokens allowed by the model. It then uses the transformed docs as context to answer the question. It is suitable for QA tasks over larger documents. +- `RefineDocumentsChain`: This chain iterates over the documents one by one to update a running answer, using the previous intermediate answer and the next doc as context each time. It is suitable for QA tasks over a large number of documents. + +## Usage, `StuffDocumentsChain` and `MapReduceDocumentsChain` + {QAExample} + +## Usage, `RefineDocumentsChain` + +{RefineExample} diff --git a/examples/src/chains/qa_refine.ts b/examples/src/chains/qa_refine.ts new file mode 100644 index 000000000000..444850ac5b88 --- /dev/null +++ b/examples/src/chains/qa_refine.ts @@ -0,0 +1,36 @@ +import { loadQARefineChain } from "langchain/chains"; +import { OpenAI } from "langchain/llms/openai"; +import { TextLoader } from "langchain/document_loaders/fs/text"; +import { MemoryVectorStore } from "langchain/vectorstores/memory"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; + +export async function run() { + // Create the models and chain + const embeddings = new OpenAIEmbeddings(); + const model = new OpenAI({ temperature: 0 }); + const chain = loadQARefineChain(model); + + // Load the documents and create the vector store + const loader = new TextLoader("./state_of_the_union.txt"); + const docs = await loader.loadAndSplit(); + const store = await MemoryVectorStore.fromDocuments(docs, embeddings); + + // Select the relevant documents + const question = "What did the president say about Justice Breyer"; + const relevantDocs = await store.similaritySearch(question); + + // Call the chain + const res = await chain.call({ + input_documents: relevantDocs, + question, + }); + + console.log(res); + /* + { + output_text: '\n' + + '\n' + + "The president said that Justice Stephen Breyer has dedicated his life to serve this country and thanked him for his service. He also mentioned that Judge Ketanji Brown Jackson will continue Justice Breyer's legacy of excellence, and that the constitutional right affirmed in Roe v. Wade—standing precedent for half a century—is under attack as never before. He emphasized the importance of protecting access to health care, preserving a woman's right to choose, and advancing maternal health care in America. He also expressed his support for the LGBTQ+ community, and his commitment to protecting their rights, including offering a Unity Agenda for the Nation to beat the opioid epidemic, increase funding for prevention, treatment, harm reduction, and recovery, and strengthen the Violence Against Women Act." + } + */ +} diff --git a/langchain/src/chains/combine_docs_chain.ts b/langchain/src/chains/combine_docs_chain.ts index 16b4d20a480c..6cdb29649504 100644 --- a/langchain/src/chains/combine_docs_chain.ts +++ b/langchain/src/chains/combine_docs_chain.ts @@ -278,7 +278,7 @@ export class RefineDocumentsChain fields.initialResponseName ?? this.initialResponseName; } - _constructInitialInputs(doc: Document, rest: Record) { + async _constructInitialInputs(doc: Document, rest: Record) { const baseInfo: Record = { page_content: doc.pageContent, ...doc.metadata, @@ -289,7 +289,7 @@ export class RefineDocumentsChain }); const baseInputs: Record = { - [this.documentVariableName]: this.documentPrompt.format({ + [this.documentVariableName]: await this.documentPrompt.format({ ...documentInfo, }), }; @@ -297,7 +297,7 @@ export class RefineDocumentsChain return inputs; } - _constructRefineInputs(doc: Document, res: string) { + async _constructRefineInputs(doc: Document, res: string) { const baseInfo: Record = { page_content: doc.pageContent, ...doc.metadata, @@ -307,7 +307,7 @@ export class RefineDocumentsChain documentInfo[value] = baseInfo[value]; }); const baseInputs: Record = { - [this.documentVariableName]: this.documentPrompt.format({ + [this.documentVariableName]: await this.documentPrompt.format({ ...documentInfo, }), }; @@ -323,13 +323,19 @@ export class RefineDocumentsChain const currentDocs = docs as Document[]; - const initialInputs = this._constructInitialInputs(currentDocs[0], rest); + const initialInputs = await this._constructInitialInputs( + currentDocs[0], + rest + ); let res = await this.llmChain.predict({ ...initialInputs }); const refineSteps = [res]; for (let i = 1; i < currentDocs.length; i += 1) { - const refineInputs = this._constructRefineInputs(currentDocs[i], res); + const refineInputs = await this._constructRefineInputs( + currentDocs[i], + res + ); const inputs = { ...refineInputs, ...rest }; res = await this.refineLLMChain.predict({ ...inputs }); refineSteps.push(res); From eb398d4aeff1db34c835f93eeae653243a97c606 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Wed, 12 Apr 2023 17:25:51 +0100 Subject: [PATCH 2/3] Update wording --- docs/docs/modules/chains/index_related_chains/document_qa.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/modules/chains/index_related_chains/document_qa.mdx b/docs/docs/modules/chains/index_related_chains/document_qa.mdx index e8156bb34a6a..4d0233d6835b 100644 --- a/docs/docs/modules/chains/index_related_chains/document_qa.mdx +++ b/docs/docs/modules/chains/index_related_chains/document_qa.mdx @@ -13,7 +13,7 @@ These chains are the building blocks more complex chains for processing unstruct - `StuffDocumentsChain`: This chain is the simplest of the 3 chains and simply injects all documents passes in into the prompt. It then returns the answer to the question, using all documents as context. It is suitable for QA tasks over a small number of documents. - `MapReduceDocumentsChain`: This chain adds a preprocessing step to select relevant portions of each document until the total number of tokens is less than the maximum number of tokens allowed by the model. It then uses the transformed docs as context to answer the question. It is suitable for QA tasks over larger documents. -- `RefineDocumentsChain`: This chain iterates over the documents one by one to update a running answer, using the previous intermediate answer and the next doc as context each time. It is suitable for QA tasks over a large number of documents. +- `RefineDocumentsChain`: This chain iterates over the documents one by one to update a running answer, at each turn using the previous version of the answer and the next doc as context. It is suitable for QA tasks over a large number of documents. ## Usage, `StuffDocumentsChain` and `MapReduceDocumentsChain` From a180e71d2aa90bda3186bb5744c8d2615019e589 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Wed, 12 Apr 2023 17:34:57 +0100 Subject: [PATCH 3/3] Fix --- docs/docs/modules/chains/index_related_chains/document_qa.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/modules/chains/index_related_chains/document_qa.mdx b/docs/docs/modules/chains/index_related_chains/document_qa.mdx index 4d0233d6835b..7f129bd844f6 100644 --- a/docs/docs/modules/chains/index_related_chains/document_qa.mdx +++ b/docs/docs/modules/chains/index_related_chains/document_qa.mdx @@ -8,11 +8,11 @@ import CodeBlock from "@theme/CodeBlock"; # Document QA Chains -LangChain provides chains used for processing unstructured text data: `StuffDocumentsChain` and `MapReduceDocumentsChain`. +LangChain provides chains used for processing unstructured text data: `StuffDocumentsChain`, `MapReduceDocumentsChain` and `RefineDocumentsChain`. These chains are the building blocks more complex chains for processing unstructured text data and receive both documents and a question as input. They then utilize the language model to provide an answer to the question based on the given documents. - `StuffDocumentsChain`: This chain is the simplest of the 3 chains and simply injects all documents passes in into the prompt. It then returns the answer to the question, using all documents as context. It is suitable for QA tasks over a small number of documents. -- `MapReduceDocumentsChain`: This chain adds a preprocessing step to select relevant portions of each document until the total number of tokens is less than the maximum number of tokens allowed by the model. It then uses the transformed docs as context to answer the question. It is suitable for QA tasks over larger documents. +- `MapReduceDocumentsChain`: This chain adds a preprocessing step to select relevant portions of each document until the total number of tokens is less than the maximum number of tokens allowed by the model. It then uses the transformed docs as context to answer the question. It is suitable for QA tasks over larger documents, and it runs the preprocessing step in parallel, which can reduce the running time. - `RefineDocumentsChain`: This chain iterates over the documents one by one to update a running answer, at each turn using the previous version of the answer and the next doc as context. It is suitable for QA tasks over a large number of documents. ## Usage, `StuffDocumentsChain` and `MapReduceDocumentsChain`