textgen ollama code refactor. (opea-project#1158)

Remove Ollama folder since default openai API is able to consume Ollama service, modified Ollama readme and added UT. opea-project#998 Signed-off-by: Ye, Xinyu <[email protected]>
opea-aws-proserve · Jan 23, 2025 · b2fb42d · b2fb42d
1 parent a7f08f2
commit b2fb42d
Show file tree

Hide file tree

Showing 11 changed files with 83 additions and 118 deletions.
diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml
@@ -11,10 +11,6 @@ services:
     build:
       dockerfile: comps/llms/src/text-generation/Dockerfile.intel_hpu
     image: ${REGISTRY:-opea}/llm-textgen-gaudi:${TAG:-latest}
-  llm-ollama:
-    build:
-      dockerfile: comps/llms/text-generation/ollama/langchain/Dockerfile
-    image: ${REGISTRY:-opea}/llm-ollama:${TAG:-latest}
   llm-docsum:
     build:
       dockerfile: comps/llms/src/doc-summarization/Dockerfile

diff --git a/comps/finetuning/src/README.md b/comps/finetuning/src/README.md
@@ -244,7 +244,7 @@ curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Typ
 
 ### 3.4 Leverage fine-tuned model
 
-After fine-tuning job is done, fine-tuned model can be chosen from listed checkpoints, then the fine-tuned model can be used in other microservices. For example, fine-tuned reranking model can be used in [reranks](../../rerankings/src/README.md) microservice by assign its path to the environment variable `RERANK_MODEL_ID`, fine-tuned embedding model can be used in [embeddings](../../embeddings/src/README.md) microservice by assign its path to the environment variable `model`, LLMs after instruction tuning can be used in [llms](../../llms/text-generation/README.md) microservice by assign its path to the environment variable `your_hf_llm_model`.
+After fine-tuning job is done, fine-tuned model can be chosen from listed checkpoints, then the fine-tuned model can be used in other microservices. For example, fine-tuned reranking model can be used in [reranks](../../rerankings/src/README.md) microservice by assign its path to the environment variable `RERANK_MODEL_ID`, fine-tuned embedding model can be used in [embeddings](../../embeddings/src/README.md) microservice by assign its path to the environment variable `model`, LLMs after instruction tuning can be used in [llms](../../llms/src/text-generation/README.md) microservice by assign its path to the environment variable `your_hf_llm_model`.
 
 ## 🚀4. Descriptions for Finetuning parameters
 

diff --git a/...ext-generation/ollama/langchain/README.md → ...llms/src/text-generation/README_ollama.md b/...ext-generation/ollama/langchain/README.md → ...llms/src/text-generation/README_ollama.md
@@ -57,18 +57,18 @@ curl --noproxy "*" http://localhost:11434/api/generate -d '{
 ## Build Docker Image
 
 ```bash
-cd GenAIComps/
-docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
+cd ../../../../
+docker build -t opea/llm-textgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
 ```
 
 ## Run the Ollama Microservice
 
 ```bash
-docker run --network host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llm-ollama:latest
+docker run --network host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLM_ENDPOINT="http://localhost:11434" -e LLM_MODEL_ID="llama3" opea/llm-textgen:latest
 ```
 
 ## Consume the Ollama Microservice
 
 ```bash
-curl http://127.0.0.1:9000/v1/chat/completions  -X POST   -d '{"model": "llama3", "query":"What is Deep Learning?","max_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"stream":true}'   -H 'Content-Type: application/json'
+curl http://127.0.0.1:9000/v1/chat/completions  -X POST   -d '{"messages": [{"role": "user", "content": "What is Deep Learning?"}]}' -H 'Content-Type: application/json'
 ```
diff --git a/comps/llms/text-generation/README.md → ...lms/src/text-generation/README_textgen.md b/comps/llms/text-generation/README.md → ...lms/src/text-generation/README_textgen.md
diff --git a/comps/llms/text-generation/ollama/langchain/Dockerfile b/comps/llms/text-generation/ollama/langchain/Dockerfile
diff --git a/comps/llms/text-generation/ollama/langchain/__init__.py b/comps/llms/text-generation/ollama/langchain/__init__.py
diff --git a/comps/llms/text-generation/ollama/langchain/entrypoint.sh b/comps/llms/text-generation/ollama/langchain/entrypoint.sh
diff --git a/comps/llms/text-generation/ollama/langchain/llm.py b/comps/llms/text-generation/ollama/langchain/llm.py
diff --git a/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt b/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt
diff --git a/comps/llms/text-generation/ollama/langchain/requirements.txt b/comps/llms/text-generation/ollama/langchain/requirements.txt
diff --git a/tests/llms/test_llms_text-generation_service_ollama.sh b/tests/llms/test_llms_text-generation_service_ollama.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+ollama_endpoint_port=11435
+llm_port=9000
+
+function build_docker_images() {
+    cd $WORKPATH
+    docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/llm:comps -f comps/llms/src/text-generation/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm built fail"
+        exit 1
+    else
+        echo "opea/llm built successful"
+    fi
+}
+
+function start_service() {
+    export llm_model=$1
+    docker run -d --name="test-comps-llm-ollama-endpoint" -e https_proxy=$https_proxy -p $ollama_endpoint_port:11434 ollama/ollama
+    export LLM_ENDPOINT="http://${ip_address}:${ollama_endpoint_port}"
+
+    sleep 5s
+    docker exec test-comps-llm-ollama-endpoint ollama pull $llm_model
+    sleep 20s
+
+    unset http_proxy
+    docker run -d --name="test-comps-llm-ollama-server" -p $llm_port:9000 --ipc=host -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLM_ENDPOINT=$LLM_ENDPOINT -e LLM_MODEL_ID=$llm_model opea/llm:comps
+    sleep 20s
+}
+
+function validate_microservice() {
+    result=$(http_proxy="" curl http://${ip_address}:${llm_port}/v1/chat/completions \
+        -X POST \
+        -d '{"messages": [{"role": "user", "content": "What is Deep Learning?"}]}' \
+        -H 'Content-Type: application/json')
+    if [[ $result == *"content"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-llm-ollama-endpoint >> ${LOG_PATH}/llm-ollama.log
+        docker logs test-comps-llm-ollama-server >> ${LOG_PATH}/llm-server.log
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-llm-ollama*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+    build_docker_images
+
+    pip install --no-cache-dir openai
+
+    llm_models=(
+    llama3.2:1b
+    )
+    for model in "${llm_models[@]}"; do
+      start_service "${model}"
+      validate_microservice
+      stop_docker
+    done
+
+    echo y | docker system prune
+
+}
+
+main