diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile index 3fb6b3650e..53963d884d 100644 --- a/comps/retrievers/src/Dockerfile +++ b/comps/retrievers/src/Dockerfile @@ -26,7 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi && \ pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \ pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/retrievers/src/requirements.txt && \ - pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0 + pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0 ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/retrievers/src/integrations/vdms.py b/comps/retrievers/src/integrations/vdms.py index b6a44fdf14..5e5b1731fa 100644 --- a/comps/retrievers/src/integrations/vdms.py +++ b/comps/retrievers/src/integrations/vdms.py @@ -48,7 +48,7 @@ def _initialize_embedder(self): from comps.third_parties.clip.src.clip_embedding import vCLIP embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64}) - if TEI_EMBEDDING_ENDPOINT: + elif TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service if logflag: logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt index a04fef1771..511bcc744f 100644 --- a/comps/retrievers/src/requirements.txt +++ b/comps/retrievers/src/requirements.txt @@ -3,6 +3,7 @@ cairosvg docarray[full] docx2txt easyocr +einops fastapi future graspologic diff --git a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml index 35dc90c32e..9c1ead2b94 100644 --- a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 include: - - ../../../tei/deployment/docker_compose/compose.yaml + - ../../../tei/deployment/docker_compose/compose.yaml services: pathway-db: @@ -12,13 +12,15 @@ services: - "${PATHWAY_PORT:-6379}:${PATHWAY_PORT:-6379}" volumes: - "${PATHWAY_VOLUME:-../../src/README.md}:/app/data/README.md" - network_mode: host environment: + no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} PATHWAY_HOST: ${PATHWAY_HOST_DB} PATHWAY_PORT: ${PATHWAY_PORT} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HF_TOKEN: ${HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "sleep 30 && exit 0"] interval: 1s diff --git a/comps/third_parties/pathway/src/requirements.txt b/comps/third_parties/pathway/src/requirements.txt index ef1bcb44bc..e552e247ff 100644 --- a/comps/third_parties/pathway/src/requirements.txt +++ b/comps/third_parties/pathway/src/requirements.txt @@ -1,7 +1,7 @@ langchain langchain-community -langchain_huggingface -langchain_openai +openai pathway[xpack-llm] sentence-transformers +tiktoken unstructured[all-docs] >= 0.16 diff --git a/comps/third_parties/pathway/src/vectorstore_pathway.py b/comps/third_parties/pathway/src/vectorstore_pathway.py index 22a23a2414..1b9d207edb 100644 --- a/comps/third_parties/pathway/src/vectorstore_pathway.py +++ b/comps/third_parties/pathway/src/vectorstore_pathway.py @@ -7,8 +7,7 @@ import nltk import pathway as pw from langchain import text_splitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_huggingface import HuggingFaceEndpointEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from pathway.xpacks.llm.parsers import ParseUnstructured from pathway.xpacks.llm.vector_store import VectorStoreServer @@ -40,7 +39,7 @@ port = int(os.getenv("PATHWAY_PORT", 8666)) EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") if __name__ == "__main__": @@ -48,7 +47,9 @@ if tei_embedding_endpoint: # create embeddings using TEI endpoint service logging.info(f"Initializing the embedder from tei_embedding_endpoint: {tei_embedding_endpoint}") - embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) + embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=EMBED_MODEL, api_url=tei_embedding_endpoint + ) else: # create embeddings using local embedding model embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/tests/retrievers/test_retrievers_elasticsearch.sh b/tests/retrievers/test_retrievers_elasticsearch.sh index 60996a44ec..a5fd53fb72 100644 --- a/tests/retrievers/test_retrievers_elasticsearch.sh +++ b/tests/retrievers/test_retrievers_elasticsearch.sh @@ -79,6 +79,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=elasticsearch-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_milvus.sh b/tests/retrievers/test_retrievers_milvus.sh index 507f43c5af..05bd69601a 100644 --- a/tests/retrievers/test_retrievers_milvus.sh +++ b/tests/retrievers/test_retrievers_milvus.sh @@ -83,6 +83,8 @@ function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh index f6857f35cb..a819e2e485 100644 --- a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh +++ b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh @@ -46,8 +46,8 @@ function start_service() { export RETRIEVER_PORT=11635 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export DATA_PATH="/data2/cache" - export MAX_INPUT_TOKENS=1024 - export MAX_TOTAL_TOKENS=3000 + export MAX_INPUT_TOKENS=4096 + export MAX_TOTAL_TOKENS=8192 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" @@ -67,7 +67,7 @@ function start_service() { docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \ -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \ - -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps + -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_LEN=$MAX_INPUT_TOKENS -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps sleep 1m @@ -152,7 +152,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-*") + cid=$(docker ps -aq --filter "name=test-comps-*" --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server" --filter "name=tei-embedding-serving") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans diff --git a/tests/retrievers/test_retrievers_opensearch.sh b/tests/retrievers/test_retrievers_opensearch.sh index 7a5fc0aeb2..ae49c41a90 100644 --- a/tests/retrievers/test_retrievers_opensearch.sh +++ b/tests/retrievers/test_retrievers_opensearch.sh @@ -75,6 +75,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=opensearch-vector-db" --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_pathway.sh b/tests/retrievers/test_retrievers_pathway.sh index 86fadaa812..3dbc2bb301 100644 --- a/tests/retrievers/test_retrievers_pathway.sh +++ b/tests/retrievers/test_retrievers_pathway.sh @@ -69,6 +69,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=pathway-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_pgvector.sh b/tests/retrievers/test_retrievers_pgvector.sh index 021d81a0c2..2a51a3e91d 100644 --- a/tests/retrievers/test_retrievers_pgvector.sh +++ b/tests/retrievers/test_retrievers_pgvector.sh @@ -64,6 +64,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=pgvector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_qdrant.sh b/tests/retrievers/test_retrievers_qdrant.sh index da2d343ffc..e50642ac0b 100644 --- a/tests/retrievers/test_retrievers_qdrant.sh +++ b/tests/retrievers/test_retrievers_qdrant.sh @@ -59,6 +59,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=qdrant-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_redis.sh b/tests/retrievers/test_retrievers_redis.sh index 0964049f98..aa2bbe61fc 100644 --- a/tests/retrievers/test_retrievers_redis.sh +++ b/tests/retrievers/test_retrievers_redis.sh @@ -131,6 +131,8 @@ function validate_mm_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans + cid=$(docker ps -aq --filter "name=redis-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_vdms.sh b/tests/retrievers/test_retrievers_vdms.sh index cd2b41b53e..732fd91134 100644 --- a/tests/retrievers/test_retrievers_vdms.sh +++ b/tests/retrievers/test_retrievers_vdms.sh @@ -78,6 +78,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans + cid=$(docker ps -aq --filter "name=retriever-vdms*" --filter "name=vdms-vector-db" --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() {