From 4d30f1440ec299dbbb04a192389f91ec046e99cf Mon Sep 17 00:00:00 2001 From: mwaykole Date: Wed, 13 Mar 2024 09:38:35 +0530 Subject: [PATCH] adding model-info and tokenize to the TC RHOAIENG-3483 Signed-off-by: mwaykole --- .../Resources/Files/llm/model_expected_responses.json | 8 +++++++- .../LLMs/422__model_serving_llm_models.robot | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/ods_ci/tests/Resources/Files/llm/model_expected_responses.json b/ods_ci/tests/Resources/Files/llm/model_expected_responses.json index 7de78aec6..9d0b873d8 100644 --- a/ods_ci/tests/Resources/Files/llm/model_expected_responses.json +++ b/ods_ci/tests/Resources/Files/llm/model_expected_responses.json @@ -38,7 +38,10 @@ "llama-2-13b-chat": { "response_tokens": 20, "response_text": "\n\nWater boils at 100 degrees Celsius or 212", - "streamed_response_text": "{ 'inputTokenCount': 10}{ 'generatedTokenCount': 2, 'text': '\n'}{ 'generatedTokenCount': 3, 'text': 'I a'}{ 'generatedTokenCount': 4, 'text': 'm no'}{ 'generatedTokenCount': 5, 'text': 't sur'}{ 'generatedTokenCount': 6, 'text': 'e i'}{ 'generatedTokenCount': 7, 'text': 'f thi'}{ 'generatedTokenCount': 8, 'text': 's i'}{ 'generatedTokenCount': 9, 'text': 's th'}{ 'generatedTokenCount': 10, 'text': 'e righ'}{ 'generatedTokenCount': 11, 'text': 't plac'}{ 'generatedTokenCount': 12, 'text': 'e t'}{ 'generatedTokenCount': 13, 'text': 'o as'}{ 'generatedTokenCount': 14, 'text': 'k thi'}{ 'generatedTokenCount': 15, 'text': 's questio'}{ 'generatedTokenCount': 16, 'text': 'n'}{ 'generatedTokenCount': 17, 'text': ', bu'}{ 'generatedTokenCount': 18, 'text': 't '}{ 'generatedTokenCount': 19, 'text': 'I a'}{ 'generatedTokenCount': 20, 'text': 'm trying', 'stopReason': 'MAX_TOKENS'}" + "streamed_response_text": "{ 'inputTokenCount': 10}{ 'generatedTokenCount': 2, 'text': '\n'}{ 'generatedTokenCount': 3, 'text': 'I a'}{ 'generatedTokenCount': 4, 'text': 'm no'}{ 'generatedTokenCount': 5, 'text': 't sur'}{ 'generatedTokenCount': 6, 'text': 'e i'}{ 'generatedTokenCount': 7, 'text': 'f thi'}{ 'generatedTokenCount': 8, 'text': 's i'}{ 'generatedTokenCount': 9, 'text': 's th'}{ 'generatedTokenCount': 10, 'text': 'e righ'}{ 'generatedTokenCount': 11, 'text': 't plac'}{ 'generatedTokenCount': 12, 'text': 'e t'}{ 'generatedTokenCount': 13, 'text': 'o as'}{ 'generatedTokenCount': 14, 'text': 'k thi'}{ 'generatedTokenCount': 15, 'text': 's questio'}{ 'generatedTokenCount': 16, 'text': 'n'}{ 'generatedTokenCount': 17, 'text': ', bu'}{ 'generatedTokenCount': 18, 'text': 't '}{ 'generatedTokenCount': 19, 'text': 'I a'}{ 'generatedTokenCount': 20, 'text': 'm trying', 'stopReason': 'MAX_TOKENS'}", + "tgis-runtime": { + "tokenize_response_text": "{'responses':[{'tokenCount':9,'tokens':['\\u003cs\\u003e','▁At','▁what','▁temperature','▁does','▁water','▁bo','il','?']}]}" + } } } }, @@ -143,6 +146,9 @@ }, "mpt-7b-instruct2":{ "tgis-runtime": "{ 'maxSequenceLength': 2048, 'maxNewTokens': 1024 }" + }, + "llama-2-13b-chat-hf":{ + "tgis-runtime": "{ 'maxSequenceLength': 4096,'maxNewTokens': 1024}" } } } diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot index 4c8b9eb78..5d053c076 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot @@ -345,6 +345,14 @@ Verify User Can Serve And Query A meta-llama/llama-2-13b-chat Model ... inference_type=streaming n_times=1 protocol=grpc ... namespace=${test_namespace} query_idx=0 validate_response=${FALSE} ... port_forwarding=${use_port_forwarding} + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=model-info n_times=0 + ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... port_forwarding=${use_port_forwarding} + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=tokenize n_times=0 query_idx=0 + ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... port_forwarding=${use_port_forwarding} [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} ... isvc_names=${models_names} wait_prj_deletion=${FALSE}