diff --git a/lazyllm/tools/infer_service/client.py b/lazyllm/tools/infer_service/client.py index a7f97e601..47e9a870b 100644 --- a/lazyllm/tools/infer_service/client.py +++ b/lazyllm/tools/infer_service/client.py @@ -120,9 +120,6 @@ def get_infra_handle(self, token, job_id): response.raise_for_status() response = response.json() base_model, url, deploy_method = response['base_model'], response['url'], response['deploy_method'] - lazyllm.LOG.warning(base_model) - lazyllm.LOG.warning(url) - lazyllm.LOG.warning(deploy_method) if self.uniform_status(response['status']) != 'Ready': raise RuntimeError(f'Job {job_id} is not running now') if not (deployer := getattr(lazyllm.deploy, deploy_method, None)): diff --git a/lazyllm/tools/infer_service/serve.py b/lazyllm/tools/infer_service/serve.py index 6082c2fd5..e8c262793 100644 --- a/lazyllm/tools/infer_service/serve.py +++ b/lazyllm/tools/infer_service/serve.py @@ -40,6 +40,7 @@ def _update_status(self, token, job_id): # Ready to Infer if Status[status] == Status.Running and m._url: update['status'] = 'Ready' + update['url'] = m._url # Some tasks cannot obtain the storage path when they are just started if not log_path: diff --git a/tests/advanced_tests/standard_test/test_engine.py b/tests/advanced_tests/standard_test/test_engine.py index e433459c9..eba362d46 100644 --- a/tests/advanced_tests/standard_test/test_engine.py +++ b/tests/advanced_tests/standard_test/test_engine.py @@ -192,3 +192,26 @@ def test_engine_train_serve(self): res = engine.local_model_get_training_cost(token, job_id) assert res > 15 + + def test_engine_infer_server(self): + token = '123' + engine = LightEngine() + engine.launch_localllm_infer_service() + jobid, status = engine.deploy_model(token, 'internlm2-chat-7b') + engine.infer_client.wait_ready(token, jobid) + r = engine.get_infra_handle(token, jobid) + assert isinstance(r, lazyllm.TrainableModule) and r._impl._get_deploy_tasks.flag + assert '你好' in r('请重复下面一句话:你好') + + nodes = [dict(id='0', kind='SharedLLM', name='m1', args=dict( + llm=jobid, local=False, token=token, stream=True, prompt=dict( + system='请根据输入帮我计算,不要反问和发挥', user='输入: {query} \n, 答案:')))] + gid = engine.start(nodes) + assert '2' in engine.run(gid, '1 + 1 = ?') + + engine.stop(gid) + nodes = [dict(id='1', kind='OnlineLLM', name='m1', args=dict( + source='lazyllm', base_model=jobid, token=token, stream=True, prompt=dict( + system='请根据输入帮我计算,不要反问和发挥', user='输入: {query} \n, 答案:')))] + gid = engine.start(nodes) + assert '2' in engine.run(gid, '1 + 1 = ?')