diff --git a/lazyllm/components/deploy/utils.py b/lazyllm/components/deploy/utils.py index 58ab580f..9a9d6c95 100644 --- a/lazyllm/components/deploy/utils.py +++ b/lazyllm/components/deploy/utils.py @@ -1,5 +1,5 @@ import os -import datetime +from datetime import datetime import random import lazyllm diff --git a/lazyllm/engine/engine.py b/lazyllm/engine/engine.py index 618182c7..c2fd80a1 100644 --- a/lazyllm/engine/engine.py +++ b/lazyllm/engine/engine.py @@ -478,7 +478,7 @@ def make_shared_llm(llm: str, local: bool = True, prompt: Optional[str] = None, def make_online_llm(source: str, base_model: Optional[str] = None, prompt: Optional[str] = None, api_key: Optional[str] = None, secret_key: Optional[str] = None, stream: bool = False, token: Optional[str] = None, base_url: Optional[str] = None): - if source.lower() == 'lazyllm': + if source and source.lower() == 'lazyllm': return make_shared_llm(base_model, False, prompt, token, stream) else: return lazyllm.OnlineChatModule(base_model, source, base_url, stream, diff --git a/lazyllm/tools/infer_service/client.py b/lazyllm/tools/infer_service/client.py index 263ea402..fda762c1 100644 --- a/lazyllm/tools/infer_service/client.py +++ b/lazyllm/tools/infer_service/client.py @@ -1,3 +1,4 @@ +import time from urllib.parse import urljoin import requests import lazyllm @@ -124,3 +125,28 @@ def get_infra_handle(self, token, job_id): if not (deployer := getattr(lazyllm.deploy, deploy_method, None)): deployer = type(lazyllm.deploy.auto(base_model)) return lazyllm.TrainableModule(base_model).deploy_method(deployer, url=url) + + def wait_ready(self, token, job_id, timeout=1800): + ''' + This method to wait for a specific job based on the provided job ID. + + Parameters: + - token(str): A string representing the authentication token required to access the job details. + - job_id(str): The unique identifier for the job whose details need to be retrieved. + + Returns: + - infer service status. + ''' + def get_status(): + response = requests.get(urljoin(self.url, f'jobs/{job_id}'), headers={'token': token}) + response.raise_for_status() + response = response.json() + return self.uniform_status(response['status']) + + n = 0 + while (status := get_status()) != 'Running': + if status in ('Invalid', 'Cancelled', 'Failed'): + raise RuntimeError(f'Deploy service failed. status is {status}') + if n > timeout: raise TimeoutError('Inference service has not started after 1800 seconds.') + time.sleep(10) + n += 10 diff --git a/tests/requirements.txt b/tests/requirements.txt index 522da161..57f1add7 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -3,7 +3,8 @@ docx2txt olefile pytest-rerunfailures pytest-order -pymilvus>=2.4.7, <2.4.11 +pymilvus==2.4.10 +milvus-lite==2.4.10 openpyxl nbconvert python-pptx