diff --git a/haystack/database/elasticsearch.py b/haystack/database/elasticsearch.py index b9884cc54b..b583117c43 100644 --- a/haystack/database/elasticsearch.py +++ b/haystack/database/elasticsearch.py @@ -120,5 +120,11 @@ def query(self, query, top_k=10, candidate_doc_ids=None): meta_data = [] for hit in result: paragraphs.append(hit["_source"][self.text_field]) - meta_data.append({"paragraph_id": hit["_id"], "document_id": hit["_source"][self.doc_id_field]}) + meta_data.append( + { + "paragraph_id": hit["_id"], + "document_id": hit["_source"][self.doc_id_field], + "document_name": hit["_source"][self.name_field], + } + ) return paragraphs, meta_data diff --git a/haystack/finder.py b/haystack/finder.py index e32fa45921..7f8b4f32fa 100644 --- a/haystack/finder.py +++ b/haystack/finder.py @@ -36,8 +36,15 @@ def get_answers(self, question, top_k_reader=1, top_k_retriever=10, filters=None # 3) Apply reader to get granular answer(s) logger.info(f"Applying the reader now to look for the answer in detail ...") results = self.reader.predict(question=question, - paragrahps=paragraphs, + paragraphs=paragraphs, meta_data_paragraphs=meta_data, top_k=top_k_reader) + # Add corresponding document_name if an answer contains the document_id (only supported in FARMReader) + for ans in results["answers"]: + document_name = next( + (meta["document_name"] for meta in meta_data if meta["document_id"] == ans["document_id"]), None + ) + ans["document_name"] = document_name + return results \ No newline at end of file diff --git a/haystack/reader/farm.py b/haystack/reader/farm.py index da430a5377..2c5e7cdd95 100644 --- a/haystack/reader/farm.py +++ b/haystack/reader/farm.py @@ -190,7 +190,8 @@ def predict(self, question, paragraphs, meta_data_paragraphs=None, top_k=None, m for paragraph, meta_data in zip(paragraphs, meta_data_paragraphs): cur = {"text": paragraph, "questions": [question], - "document_id": meta_data["document_id"] + "document_id": meta_data["document_id"], + "document_name": meta_data["document_name"], } input_dicts.append(cur) diff --git a/haystack/reader/transformers.py b/haystack/reader/transformers.py index 440f1a6d66..5dd725e224 100644 --- a/haystack/reader/transformers.py +++ b/haystack/reader/transformers.py @@ -43,7 +43,7 @@ def __init__( #TODO param to modify bias for no_answer - def predict(self, question, paragrahps, meta_data_paragraphs=None, top_k=None): + def predict(self, question, paragraphs, meta_data_paragraphs=None, top_k=None): """ Use loaded QA model to find answers for a question in the supplied paragraphs. @@ -76,7 +76,7 @@ def predict(self, question, paragrahps, meta_data_paragraphs=None, top_k=None): # get top-answers for each candidate passage answers = [] - for p in paragrahps: + for p in paragraphs: query = {"context": p, "question": question} predictions = self.model(query, topk=self.n_best_per_passage) # assemble and format all answers