From c3c8497347aefe5206efaae61f0fa1554537f914 Mon Sep 17 00:00:00 2001 From: Letong Han <106566639+letonghan@users.noreply.github.com> Date: Fri, 14 Feb 2025 22:29:38 +0800 Subject: [PATCH] Fix Qdrant retriever RAG issue. (#1289) * Fix Qdrant retriever no retrieved result issue. Signed-off-by: letonghan --- .../deployment/docker_compose/compose.yaml | 2 +- comps/retrievers/src/integrations/qdrant.py | 19 ++++++++++++++----- .../src/opea_retrievers_microservice.py | 3 ++- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/comps/retrievers/deployment/docker_compose/compose.yaml b/comps/retrievers/deployment/docker_compose/compose.yaml index 2d82f6570c..a3309d7d6f 100644 --- a/comps/retrievers/deployment/docker_compose/compose.yaml +++ b/comps/retrievers/deployment/docker_compose/compose.yaml @@ -131,7 +131,7 @@ services: RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_QDRANT} QDRANT_HOST: ${QDRANT_HOST} QDRANT_PORT: ${QDRANT_PORT} - INDEX_NAME: ${INDEX_NAME} + QDRANT_INDEX_NAME: ${INDEX_NAME} depends_on: qdrant-vector-db: condition: service_healthy diff --git a/comps/retrievers/src/integrations/qdrant.py b/comps/retrievers/src/integrations/qdrant.py index 202dcb93c0..aadee8621f 100644 --- a/comps/retrievers/src/integrations/qdrant.py +++ b/comps/retrievers/src/integrations/qdrant.py @@ -3,6 +3,7 @@ import os +from types import SimpleNamespace from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever from haystack_integrations.document_stores.qdrant import QdrantDocumentStore @@ -26,7 +27,7 @@ class OpeaQDrantRetriever(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) - self.retriever = self._initialize_client() + self.db_store, self.retriever = self._initialize_client() health_status = self.check_health() if not health_status: logger.error("OpeaQDrantRetriever health check failed.") @@ -43,7 +44,7 @@ def _initialize_client(self) -> QdrantEmbeddingRetriever: retriever = QdrantEmbeddingRetriever(document_store=qdrant_store) - return retriever + return qdrant_store, retriever def check_health(self) -> bool: """Checks the health of the retriever service. @@ -55,7 +56,7 @@ def check_health(self) -> bool: logger.info("[ check health ] start to check health of QDrant") try: # Check the status of the QDrant service - _ = self.retriever.client + _ = self.db_store.client logger.info("[ check health ] Successfully connected to QDrant!") return True except Exception as e: @@ -75,6 +76,14 @@ async def invoke(self, input: EmbedDoc) -> list: search_res = self.retriever.run(query_embedding=input.embedding)["documents"] + # format result to align with the standard output in opea_retrievers_microservice.py + final_res = [] + for res in search_res: + dict_res = res.meta + res_obj = SimpleNamespace(**dict_res) + final_res.append(res_obj) + if logflag: - logger.info(f"[ similarity search ] search result: {search_res}") - return search_res + logger.info(f"[ similarity search ] search result: {final_res}") + + return final_res diff --git a/comps/retrievers/src/opea_retrievers_microservice.py b/comps/retrievers/src/opea_retrievers_microservice.py index d1d8624f49..4980cad0f0 100644 --- a/comps/retrievers/src/opea_retrievers_microservice.py +++ b/comps/retrievers/src/opea_retrievers_microservice.py @@ -81,7 +81,8 @@ async def ingest_files( r.metadata["b64_img_str"] = [input.base64_image, r.metadata["b64_img_str"]] else: r.metadata["b64_img_str"] = input.base64_image - metadata_list.append(r.metadata) + if r.metadata: + metadata_list.append(r.metadata) retrieved_docs.append(TextDoc(text=r.page_content)) result = SearchedMultimodalDoc( retrieved_docs=retrieved_docs, initial_query=input.text, metadata=metadata_list