Skip to content

Commit

Permalink
Fix VDMS retrieval issue (opea-project#1252)
Browse files Browse the repository at this point in the history
* Fix VDMS retrieval issue
Signed-off-by: lvliang-intel <[email protected]>
  • Loading branch information
lvliang-intel authored Feb 13, 2025
1 parent 23b2be2 commit bef501c
Show file tree
Hide file tree
Showing 15 changed files with 34 additions and 14 deletions.
2 changes: 1 addition & 1 deletion comps/retrievers/src/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \
fi && \
pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \
pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/retrievers/src/requirements.txt && \
pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0
pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0

ENV PYTHONPATH=$PYTHONPATH:/home/user

Expand Down
2 changes: 1 addition & 1 deletion comps/retrievers/src/integrations/vdms.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _initialize_embedder(self):
from comps.third_parties.clip.src.clip_embedding import vCLIP

embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64})
if TEI_EMBEDDING_ENDPOINT:
elif TEI_EMBEDDING_ENDPOINT:
# create embeddings using TEI endpoint service
if logflag:
logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
Expand Down
1 change: 1 addition & 0 deletions comps/retrievers/src/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ cairosvg
docarray[full]
docx2txt
easyocr
einops
fastapi
future
graspologic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

include:
- ../../../tei/deployment/docker_compose/compose.yaml
- ../../../tei/deployment/docker_compose/compose.yaml

services:
pathway-db:
Expand All @@ -12,13 +12,15 @@ services:
- "${PATHWAY_PORT:-6379}:${PATHWAY_PORT:-6379}"
volumes:
- "${PATHWAY_VOLUME:-../../src/README.md}:/app/data/README.md"
network_mode: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
PATHWAY_HOST: ${PATHWAY_HOST_DB}
PATHWAY_PORT: ${PATHWAY_PORT}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HF_TOKEN: ${HF_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
healthcheck:
test: ["CMD-SHELL", "sleep 30 && exit 0"]
interval: 1s
Expand Down
4 changes: 2 additions & 2 deletions comps/third_parties/pathway/src/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
langchain
langchain-community
langchain_huggingface
langchain_openai
openai
pathway[xpack-llm]
sentence-transformers
tiktoken
unstructured[all-docs] >= 0.16
9 changes: 5 additions & 4 deletions comps/third_parties/pathway/src/vectorstore_pathway.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import nltk
import pathway as pw
from langchain import text_splitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_huggingface import HuggingFaceEndpointEmbeddings
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
from pathway.xpacks.llm.parsers import ParseUnstructured
from pathway.xpacks.llm.vector_store import VectorStoreServer

Expand Down Expand Up @@ -40,15 +39,17 @@
port = int(os.getenv("PATHWAY_PORT", 8666))

EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")

HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT")

if __name__ == "__main__":
# Create vectorstore
if tei_embedding_endpoint:
# create embeddings using TEI endpoint service
logging.info(f"Initializing the embedder from tei_embedding_endpoint: {tei_embedding_endpoint}")
embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint)
embeddings = HuggingFaceInferenceAPIEmbeddings(
api_key=HUGGINGFACEHUB_API_TOKEN, model_name=EMBED_MODEL, api_url=tei_embedding_endpoint
)
else:
# create embeddings using local embedding model
embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_elasticsearch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=elasticsearch-vector-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_milvus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ function stop_docker() {

cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=tei-embedding-serving")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
8 changes: 4 additions & 4 deletions tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ function start_service() {
export RETRIEVER_PORT=11635
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export DATA_PATH="/data2/cache"
export MAX_INPUT_TOKENS=1024
export MAX_TOTAL_TOKENS=3000
export MAX_INPUT_TOKENS=4096
export MAX_TOTAL_TOKENS=8192
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
Expand All @@ -67,7 +67,7 @@ function start_service() {
docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \
-e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \
-e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \
-e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps
-e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_LEN=$MAX_INPUT_TOKENS -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps

sleep 1m

Expand Down Expand Up @@ -152,7 +152,7 @@ function validate_microservice() {
}

function stop_docker() {
cid=$(docker ps -aq --filter "name=test-comps-*")
cid=$(docker ps -aq --filter "name=test-comps-*" --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server" --filter "name=tei-embedding-serving")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_opensearch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=opensearch-vector-db" --filter "name=tei-embedding-serving")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_pathway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=pathway-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_pgvector.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=pgvector-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_qdrant.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=qdrant-vector-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_redis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ function validate_mm_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans
cid=$(docker ps -aq --filter "name=redis-vector-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_vdms.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans
cid=$(docker ps -aq --filter "name=retriever-vdms*" --filter "name=vdms-vector-db" --filter "name=tei-embedding-serving")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down

0 comments on commit bef501c

Please sign in to comment.