From 4e61a5132ec4b0b2a2e036ffb6d1462c1c493b3e Mon Sep 17 00:00:00 2001 From: bdattoma Date: Mon, 19 Feb 2024 17:06:42 +0100 Subject: [PATCH 01/16] add TGIS suite with initial adjustments --- .../LLMs/422__model_serving_llm _tgis.robot | 542 ++++++++++++++++++ 1 file changed, 542 insertions(+) create mode 100644 ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot new file mode 100644 index 000000000..c94622146 --- /dev/null +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot @@ -0,0 +1,542 @@ +*** Settings *** +Documentation Collection of CLI tests to validate the model serving stack for Large Language Models (LLM). +... These tests leverage on TGIS Standalone Serving Runtime +Resource ../../../../Resources/OCP.resource +Resource ../../../../Resources/CLI/ModelServing/llm.resource +Library OpenShiftLibrary +Suite Setup Suite Setup +Suite Teardown RHOSi Teardown +Test Tags KServe + + +*** Variables *** +${FLAN_MODEL_S3_DIR}= flan-t5-small/flan-t5-small-hf +${FLAN_GRAMMAR_MODEL_S3_DIR}= flan-t5-large-grammar-synthesis-caikit/flan-t5-large-grammar-synthesis-caikit +${FLAN_LARGE_MODEL_S3_DIR}= flan-t5-large/flan-t5-large +${BLOOM_MODEL_S3_DIR}= bloom-560m/bloom-560m-caikit +${FLAN_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_MODEL_S3_DIR} +${FLAN_GRAMMAR_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_GRAMMAR_MODEL_S3_DIR}/artifacts +${FLAN_LARGE_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_LARGE_MODEL_S3_DIR}/artifacts +${BLOOM_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${BLOOM_MODEL_S3_DIR}/artifacts +${TEST_NS}= tgis-standalone2 +${TGIS_RUNTIME_NAME}= tgis-runtime + + +*** Test Cases *** +Verify User Can Serve And Query A Model + [Documentation] Basic tests for preparing, deploying and querying a LLM model + ... using Kserve and Caikit+TGIS runtime + [Tags] Sanity Tier1 ODS-2341 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-cli + ${test_namespace}= Set Variable ${TEST_NS}-cli + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=streaming n_times=1 + ... namespace=${test_namespace} validate_response=${FALSE} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify User Can Deploy Multiple Models In The Same Namespace + [Documentation] Checks if user can deploy and query multiple models in the same namespace + [Tags] Sanity Tier1 ODS-2371 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-multisame + ${test_namespace}= Set Variable ${TEST_NS}-multisame + ${model_one_name}= Set Variable bloom-560m-caikit + ${model_two_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${model_one_name} ${model_two_name} + Compile Inference Service YAML isvc_name=${model_one_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${BLOOM_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Compile Inference Service YAML isvc_name=${model_two_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_one_name} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_two_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${model_one_name} runtime=${TGIS_RUNTIME_NAME} + ... n_times=5 namespace=${test_namespace} + Query Model Multiple Times model_name=${model_two_name} runtime=${TGIS_RUNTIME_NAME} + ... n_times=10 namespace=${test_namespace} + Query Model Multiple Times model_name=${model_one_name} runtime=${TGIS_RUNTIME_NAME} + ... n_times=5 namespace=${test_namespace} + Query Model Multiple Times model_name=${model_two_name} runtime=${TGIS_RUNTIME_NAME} + ... n_times=10 namespace=${test_namespace} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify User Can Deploy Multiple Models In Different Namespaces + [Documentation] Checks if user can deploy and query multiple models in the different namespaces + [Tags] Sanity Tier1 ODS-2378 + [Setup] Run Keywords Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=singlemodel-multi1 + ... AND + ... Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=singlemodel-multi2 + ${model_one_name}= Set Variable bloom-560m-caikit + ${model_two_name}= Set Variable flan-t5-small-caikit + ${models_names_ns_1}= Create List ${model_one_name} + ${models_names_ns_2}= Create List ${model_two_name} + Compile Inference Service YAML isvc_name=${model_one_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${BLOOM_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=singlemodel-multi1 + Compile Inference Service YAML isvc_name=${model_two_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=singlemodel-multi2 + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_one_name} + ... namespace=singlemodel-multi1 + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_two_name} + ... namespace=singlemodel-multi2 + Query Model Multiple Times model_name=${model_one_name} runtime=${TGIS_RUNTIME_NAME} + ... n_times=2 namespace=singlemodel-multi1 + Query Model Multiple Times model_name=${model_two_name} runtime=${TGIS_RUNTIME_NAME} + ... n_times=2 namespace=singlemodel-multi2 + [Teardown] Run Keywords Clean Up Test Project test_ns=singlemodel-multi1 isvc_names=${models_names_ns_1} + ... wait_prj_deletion=${FALSE} + ... AND + ... Clean Up Test Project test_ns=singlemodel-multi2 isvc_names=${models_names_ns_2} + ... wait_prj_deletion=${FALSE} + +Verify Model Upgrade Using Canaray Rollout + [Documentation] Checks if user can apply Canary Rollout as deployment strategy + [Tags] Sanity Tier1 ODS-2372 + ... AutomationBug + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=canary-model-upgrade + ${test_namespace}= Set Variable canary-model-upgrade + ${isvc_name}= Set Variable canary-caikit + ${model_name}= Set Variable flan-t5-small-caikit + ${isvcs_names}= Create List ${isvc_name} + ${canary_percentage}= Set Variable ${30} + Compile Deploy And Query LLM model isvc_name=${isvc_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_name=${model_name} + ... namespace=${test_namespace} + ... validate_response=${FALSE} + Log To Console Applying Canary Tarffic for Model Upgrade + ${model_name}= Set Variable bloom-560m-caikit + Compile Deploy And Query LLM model isvc_name=${isvc_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${BLOOM_STORAGE_URI} + ... model_name=${model_name} + ... canaryTrafficPercent=${canary_percentage} + ... namespace=${test_namespace} + ... validate_response=${FALSE} + ... n_queries=${0} + Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${canary_percentage} + ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} + Log To Console Remove Canary Tarffic For Model Upgrade + Compile Deploy And Query LLM model isvc_name=${isvc_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_name=${model_name} + ... model_storage_uri=${BLOOM_STORAGE_URI} + ... namespace=${test_namespace} + Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${100} + ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${isvcs_names} wait_prj_deletion=${FALSE} + +Verify Model Pods Are Deleted When No Inference Service Is Present + [Documentation] Checks if model pods gets successfully deleted after + ... deleting the KServe InferenceService object + [Tags] Tier2 ODS-2373 AutomationBug + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=no-infer-kserve + ${flan_isvc_name}= Set Variable flan-t5-small-caikit + ${model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${model_name} + Compile Deploy And Query LLM model isvc_name=${flan_isvc_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_name=${model_name} + ... namespace=no-infer-kserve + Delete InfereceService isvc_name=${flan_isvc_name} namespace=no-infer-kserve + ${rc} ${out}= Run And Return Rc And Output oc wait pod -l serving.kserve.io/inferenceservice=${flan_isvc_name} -n no-infer-kserve --for=delete --timeout=200s + Should Be Equal As Integers ${rc} ${0} + [Teardown] Clean Up Test Project test_ns=no-infer-kserve + ... isvc_names=${models_names} isvc_delete=${FALSE} + ... wait_prj_deletion=${FALSE} + +Verify User Can Change The Minimum Number Of Replicas For A Model + [Documentation] Checks if user can change the minimum number of replicas + ... of a deployed model. + ... Affected by: https://issues.redhat.com/browse/SRVKS-1175 + [Tags] Sanity Tier1 ODS-2376 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-reps + ${test_namespace}= Set Variable ${TEST_NS}-reps + ${model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${model_name} + Compile Inference Service YAML isvc_name=${model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... min_replicas=1 + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} exp_replicas=1 + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} n_times=3 + ... namespace=${test_namespace} + ${rev_id}= Set Minimum Replicas Number n_replicas=3 model_name=${model_name} + ... namespace=${test_namespace} + Wait For Pods To Be Terminated label_selector=serving.knative.dev/revisionUID=${rev_id} + ... namespace=${test_namespace} timeout=360s + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} exp_replicas=3 + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} n_times=3 + ... namespace=${test_namespace} + ${rev_id}= Set Minimum Replicas Number n_replicas=1 model_name=${model_name} + ... namespace=${test_namespace} + Wait For Pods To Be Terminated label_selector=serving.knative.dev/revisionUID=${rev_id} + ... namespace=${test_namespace} timeout=360s + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} exp_replicas=1 + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} n_times=3 + ... namespace=${test_namespace} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify User Can Autoscale Using Concurrency + [Documentation] Checks if model successfully scale up based on concurrency metrics (KPA) + [Tags] Sanity Tier1 ODS-2377 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=autoscale-con + ${test_namespace}= Set Variable autoscale-con + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... auto_scale=True + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} n_times=10 + ... namespace=${test_namespace} validate_response=${FALSE} background=${TRUE} + Wait For Pods Number number=1 comparison=GREATER THAN + ... label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify User Can Validate Scale To Zero + [Documentation] Checks if model successfully scale down to 0 if there's no traffic + [Tags] Sanity Tier1 ODS-2379 AutomationBug + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=autoscale-zero + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=autoscale-zero + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=autoscale-zero + ${host}= Get KServe Inference Host Via CLI isvc_name=${flan_model_name} namespace=autoscale-zero + ${body}= Set Variable '{"text": "At what temperature does liquid Nitrogen boil?"}' + ${header}= Set Variable 'mm-model-id: ${flan_model_name}' + Query Model With GRPCURL host=${host} port=443 + ... endpoint="caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict" + ... json_body=${body} json_header=${header} + ... insecure=${TRUE} + Set Minimum Replicas Number n_replicas=0 model_name=${flan_model_name} + ... namespace=autoscale-zero + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=autoscale-zero + Wait For Pods To Be Terminated label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=autoscale-zero + Query Model With GRPCURL host=${host} port=443 + ... endpoint="caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict" + ... json_body=${body} json_header=${header} + ... insecure=${TRUE} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=autoscale-zero + Wait For Pods To Be Terminated label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=autoscale-zero + [Teardown] Clean Up Test Project test_ns=autoscale-zero + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify User Can Set Requests And Limits For A Model + [Documentation] Checks if user can set HW request and limits on their inference service object + [Tags] Sanity Tier1 ODS-2380 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=hw-res + ${test_namespace}= Set Variable hw-res + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + ${requests}= Create Dictionary cpu=1 memory=2Gi + ${limits}= Create Dictionary cpu=2 memory=4Gi + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... requests_dict=${requests} limits_dict=${limits} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + ${rev_id}= Get Current Revision ID model_name=${flan_model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} n_times=1 + ... namespace=${test_namespace} + Container Hardware Resources Should Match Expected container_name=kserve-container + ... pod_label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} exp_requests=${requests} exp_limits=${limits} + ${new_requests}= Create Dictionary cpu=2 memory=3Gi + Set Model Hardware Resources model_name=${flan_model_name} namespace=hw-res + ... requests=${new_requests} limits=${NONE} + Wait For Pods To Be Terminated label_selector=serving.knative.dev/revisionUID=${rev_id} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} exp_replicas=1 + Container Hardware Resources Should Match Expected container_name=kserve-container + ... pod_label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} exp_requests=${new_requests} exp_limits=${NONE} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify Model Can Be Served And Query On A GPU Node + [Documentation] Basic tests for preparing, deploying and querying a LLM model on GPU node + ... using Kserve and Caikit+TGIS runtime + [Tags] Sanity Tier1 ODS-2381 Resources-GPU + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=singlemodel-gpu + ${test_namespace}= Set Variable singlemodel-gpu + ${model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${model_name} + ${requests}= Create Dictionary nvidia.com/gpu=1 + ${limits}= Create Dictionary nvidia.com/gpu=1 + Compile Inference Service YAML isvc_name=${model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... requests_dict=${requests} limits_dict=${limits} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} + Container Hardware Resources Should Match Expected container_name=kserve-container + ... pod_label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} exp_requests=${requests} exp_limits=${limits} + Model Pod Should Be Scheduled On A GPU Node label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} n_times=10 + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} n_times=5 + ... namespace=${test_namespace} inference_type=streaming validate_response=${FALSE} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${model_name} wait_prj_deletion=${FALSE} + +Verify Non Admin Can Serve And Query A Model + [Documentation] Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model + ... using Kserve and Caikit+TGIS runtime + [Tags] Sanity Tier1 ODS-2326 + [Setup] Run Keywords Login To OCP Using API ${TEST_USER_3.USERNAME} ${TEST_USER_3.PASSWORD} AND + ... Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=non-admin-test + ${test_namespace}= Set Variable non-admin-test + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + ${host}= Get KServe Inference Host Via CLI isvc_name=${flan_model_name} namespace=${test_namespace} + ${body}= Set Variable '{"text": "${EXP_RESPONSES}[queries][0][query_text]"}' + ${header}= Set Variable 'mm-model-id: ${flan_model_name}' + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=streaming n_times=1 validate_response=${FALSE} + ... namespace=${test_namespace} + [Teardown] Run Keywords Login To OCP Using API ${OCP_ADMIN_USER.USERNAME} ${OCP_ADMIN_USER.PASSWORD} AND + ... Clean Up Test Project test_ns=${test_namespace} isvc_names=${models_names} + ... wait_prj_deletion=${FALSE} + +Verify User Can Serve And Query Flan-t5 Grammar Syntax Corrector + [Documentation] Deploys and queries flan-t5-large-grammar-synthesis model + [Tags] Tier2 ODS-2441 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=grammar-model + ${test_namespace}= Set Variable grammar-model + ${flan_model_name}= Set Variable flan-t5-large-grammar-synthesis-caikit + ${models_names}= Create List ${flan_model_name} + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_GRAMMAR_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} query_idx=1 + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=streaming n_times=1 validate_response=${FALSE} + ... namespace=${test_namespace} query_idx=${1} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify User Can Serve And Query Flan-t5 Large + [Documentation] Deploys and queries flan-t5-large model + [Tags] Tier2 ODS-2434 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=flan-t5-large3 + ${test_namespace}= Set Variable flan-t5-large3 + ${flan_model_name}= Set Variable flan-t5-large + ${models_names}= Create List ${flan_model_name} + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_LARGE_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} query_idx=${0} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=streaming n_times=1 validate_response=${FALSE} + ... namespace=${test_namespace} query_idx=${0} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify Runtime Upgrade Does Not Affect Deployed Models + [Documentation] Upgrades the caikit runtime inthe same NS where a model + ... is already deployed. The expecation is that the current model + ... must remain unchanged after the runtime upgrade. + ... ATTENTION: this is an approximation of the runtime upgrade scenario, however + ... the real case scenario will be defined once RHODS actually ships the Caikit runtime. + [Tags] Sanity Tier1 ODS-2404 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-up + ${test_namespace}= Set Variable ${TEST_NS}-up + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} + ${created_at} ${caikitsha}= Get Model Pods Creation Date And Image URL model_name=${flan_model_name} + ... namespace=${test_namespace} + Upgrade Caikit Runtime Image new_image_url=quay.io/opendatahub/caikit-tgis-serving:stable + ... namespace=${test_namespace} + Sleep 5s reason=Sleep, in case the runtime upgrade takes some time to start performing actions on the pods... + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} exp_replicas=1 + ${created_at_after} ${caikitsha_after}= Get Model Pods Creation Date And Image URL model_name=${flan_model_name} + ... namespace=${test_namespace} + Should Be Equal ${created_at} ${created_at_after} + Should Be Equal As Strings ${caikitsha} ${caikitsha_after} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify User Can Access Model Metrics From UWM + [Documentation] Verifies that model metrics are available for users in the + ... OpenShift monitoring system (UserWorkloadMonitoring) + ... PARTIALLY DONE: it is checking number of requests, number of successful requests + ... and model pod cpu usage. Waiting for a complete list of expected metrics and + ... derived metrics. + [Tags] Sanity Tier1 ODS-2401 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=singlemodel-metrics enable_metrics=${TRUE} + ${test_namespace}= Set Variable singlemodel-metrics + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + ${thanos_url}= Get OpenShift Thanos URL + ${token}= Generate Thanos Token + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Wait Until Keyword Succeeds 30 times 4s + ... TGI Caikit And Istio Metrics Should Exist thanos_url=${thanos_url} thanos_token=${token} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=all-tokens n_times=3 + ... namespace=${test_namespace} + Wait Until Keyword Succeeds 50 times 5s + ... User Can Fetch Number Of Requests Over Defined Time thanos_url=${thanos_url} thanos_token=${token} + ... model_name=${flan_model_name} query_kind=single namespace=${test_namespace} period=5m exp_value=3 + Wait Until Keyword Succeeds 20 times 5s + ... User Can Fetch Number Of Successful Requests Over Defined Time thanos_url=${thanos_url} thanos_token=${token} + ... model_name=${flan_model_name} namespace=${test_namespace} period=5m exp_value=3 + Wait Until Keyword Succeeds 20 times 5s + ... User Can Fetch CPU Utilization thanos_url=${thanos_url} thanos_token=${token} + ... model_name=${flan_model_name} namespace=${test_namespace} period=5m + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=streaming n_times=1 validate_response=${FALSE} + ... namespace=${test_namespace} query_idx=${0} + Wait Until Keyword Succeeds 30 times 5s + ... User Can Fetch Number Of Requests Over Defined Time thanos_url=${thanos_url} thanos_token=${token} + ... model_name=${flan_model_name} query_kind=stream namespace=${test_namespace} period=5m exp_value=1 + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + +Verify User Can Query A Model Using HTTP Calls + [Documentation] From RHOAI 2.5 HTTP is allowed and default querying protocol. + ... This tests deploys the runtime enabling HTTP port and send queries to the model + [Tags] ODS-2501 Sanity Tier1 ProductBug + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=kserve-http protocol=http + ${test_namespace}= Set Variable kserve-http + ${model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${model_name} + Compile Inference Service YAML isvc_name=${model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} protocol=http + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} query_idx=${0} + # temporarily disabling stream response validation. Need to re-design the expected response json file + # because format of streamed response with http is slightly different from grpc + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} protocol=http + ... inference_type=streaming n_times=1 validate_response=${FALSE} + ... namespace=${test_namespace} query_idx=${0} validate_response=${FALSE} + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + + +*** Keywords *** +Suite Setup + [Documentation] + Skip If Component Is Not Enabled kserve + RHOSi Setup + Load Expected Responses \ No newline at end of file From 18ea71f5b6526654eee5d0fab158252ef384d082 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Mon, 19 Feb 2024 17:09:44 +0100 Subject: [PATCH 02/16] remove from sanity --- .../LLMs/422__model_serving_llm _tgis.robot | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot index c94622146..a8e76ad21 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot @@ -26,7 +26,7 @@ ${TGIS_RUNTIME_NAME}= tgis-runtime Verify User Can Serve And Query A Model [Documentation] Basic tests for preparing, deploying and querying a LLM model ... using Kserve and Caikit+TGIS runtime - [Tags] Sanity Tier1 ODS-2341 + [Tags] Tier1 ODS-2341 [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-cli ${test_namespace}= Set Variable ${TEST_NS}-cli ${flan_model_name}= Set Variable flan-t5-small-caikit @@ -50,7 +50,7 @@ Verify User Can Serve And Query A Model Verify User Can Deploy Multiple Models In The Same Namespace [Documentation] Checks if user can deploy and query multiple models in the same namespace - [Tags] Sanity Tier1 ODS-2371 + [Tags] Tier1 ODS-2371 [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-multisame ${test_namespace}= Set Variable ${TEST_NS}-multisame ${model_one_name}= Set Variable bloom-560m-caikit @@ -84,7 +84,7 @@ Verify User Can Deploy Multiple Models In The Same Namespace Verify User Can Deploy Multiple Models In Different Namespaces [Documentation] Checks if user can deploy and query multiple models in the different namespaces - [Tags] Sanity Tier1 ODS-2378 + [Tags] Tier1 ODS-2378 [Setup] Run Keywords Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=singlemodel-multi1 ... AND ... Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=singlemodel-multi2 @@ -119,7 +119,7 @@ Verify User Can Deploy Multiple Models In Different Namespaces Verify Model Upgrade Using Canaray Rollout [Documentation] Checks if user can apply Canary Rollout as deployment strategy - [Tags] Sanity Tier1 ODS-2372 + [Tags] Tier1 ODS-2372 ... AutomationBug [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=canary-model-upgrade ${test_namespace}= Set Variable canary-model-upgrade @@ -180,7 +180,7 @@ Verify User Can Change The Minimum Number Of Replicas For A Model [Documentation] Checks if user can change the minimum number of replicas ... of a deployed model. ... Affected by: https://issues.redhat.com/browse/SRVKS-1175 - [Tags] Sanity Tier1 ODS-2376 + [Tags] Tier1 ODS-2376 [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-reps ${test_namespace}= Set Variable ${TEST_NS}-reps ${model_name}= Set Variable flan-t5-small-caikit @@ -217,7 +217,7 @@ Verify User Can Change The Minimum Number Of Replicas For A Model Verify User Can Autoscale Using Concurrency [Documentation] Checks if model successfully scale up based on concurrency metrics (KPA) - [Tags] Sanity Tier1 ODS-2377 + [Tags] Tier1 ODS-2377 [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=autoscale-con ${test_namespace}= Set Variable autoscale-con ${flan_model_name}= Set Variable flan-t5-small-caikit @@ -243,7 +243,7 @@ Verify User Can Autoscale Using Concurrency Verify User Can Validate Scale To Zero [Documentation] Checks if model successfully scale down to 0 if there's no traffic - [Tags] Sanity Tier1 ODS-2379 AutomationBug + [Tags] Tier1 ODS-2379 AutomationBug [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=autoscale-zero ${flan_model_name}= Set Variable flan-t5-small-caikit ${models_names}= Create List ${flan_model_name} @@ -281,7 +281,7 @@ Verify User Can Validate Scale To Zero Verify User Can Set Requests And Limits For A Model [Documentation] Checks if user can set HW request and limits on their inference service object - [Tags] Sanity Tier1 ODS-2380 + [Tags] Tier1 ODS-2380 [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=hw-res ${test_namespace}= Set Variable hw-res ${flan_model_name}= Set Variable flan-t5-small-caikit @@ -320,7 +320,7 @@ Verify User Can Set Requests And Limits For A Model Verify Model Can Be Served And Query On A GPU Node [Documentation] Basic tests for preparing, deploying and querying a LLM model on GPU node ... using Kserve and Caikit+TGIS runtime - [Tags] Sanity Tier1 ODS-2381 Resources-GPU + [Tags] Tier1 ODS-2381 Resources-GPU [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=singlemodel-gpu ${test_namespace}= Set Variable singlemodel-gpu ${model_name}= Set Variable flan-t5-small-caikit @@ -351,7 +351,7 @@ Verify Model Can Be Served And Query On A GPU Node Verify Non Admin Can Serve And Query A Model [Documentation] Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model ... using Kserve and Caikit+TGIS runtime - [Tags] Sanity Tier1 ODS-2326 + [Tags] Tier1 ODS-2326 [Setup] Run Keywords Login To OCP Using API ${TEST_USER_3.USERNAME} ${TEST_USER_3.PASSWORD} AND ... Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=non-admin-test ${test_namespace}= Set Variable non-admin-test @@ -432,7 +432,7 @@ Verify Runtime Upgrade Does Not Affect Deployed Models ... must remain unchanged after the runtime upgrade. ... ATTENTION: this is an approximation of the runtime upgrade scenario, however ... the real case scenario will be defined once RHODS actually ships the Caikit runtime. - [Tags] Sanity Tier1 ODS-2404 + [Tags] Tier1 ODS-2404 [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-up ${test_namespace}= Set Variable ${TEST_NS}-up ${flan_model_name}= Set Variable flan-t5-small-caikit @@ -468,7 +468,7 @@ Verify User Can Access Model Metrics From UWM ... PARTIALLY DONE: it is checking number of requests, number of successful requests ... and model pod cpu usage. Waiting for a complete list of expected metrics and ... derived metrics. - [Tags] Sanity Tier1 ODS-2401 + [Tags] Tier1 ODS-2401 [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=singlemodel-metrics enable_metrics=${TRUE} ${test_namespace}= Set Variable singlemodel-metrics ${flan_model_name}= Set Variable flan-t5-small-caikit @@ -509,7 +509,7 @@ Verify User Can Access Model Metrics From UWM Verify User Can Query A Model Using HTTP Calls [Documentation] From RHOAI 2.5 HTTP is allowed and default querying protocol. ... This tests deploys the runtime enabling HTTP port and send queries to the model - [Tags] ODS-2501 Sanity Tier1 ProductBug + [Tags] ODS-2501 Tier1 ProductBug [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=kserve-http protocol=http ${test_namespace}= Set Variable kserve-http ${model_name}= Set Variable flan-t5-small-caikit From 838219e61dc0e6cad4fc6a254df3ea426c8b13cd Mon Sep 17 00:00:00 2001 From: bdattoma Date: Mon, 19 Feb 2024 17:17:20 +0100 Subject: [PATCH 03/16] fix ODS-2378 --- .../420__model_serving/LLMs/422__model_serving_llm _tgis.robot | 1 + 1 file changed, 1 insertion(+) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot index a8e76ad21..a2c92d226 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot @@ -101,6 +101,7 @@ Verify User Can Deploy Multiple Models In Different Namespaces Compile Inference Service YAML isvc_name=${model_two_name} ... sa_name=${DEFAULT_BUCKET_SA_NAME} ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} ... namespace=singlemodel-multi2 Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_one_name} From 46552b18456df6158817b2b3d6df16bce6f3f2b7 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Mon, 19 Feb 2024 17:32:52 +0100 Subject: [PATCH 04/16] fix ODS-2373 --- .../LLMs/422__model_serving_llm _tgis.robot | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot index a2c92d226..37e0e1114 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot @@ -134,6 +134,7 @@ Verify Model Upgrade Using Canaray Rollout ... model_name=${model_name} ... namespace=${test_namespace} ... validate_response=${FALSE} + ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} Log To Console Applying Canary Tarffic for Model Upgrade ${model_name}= Set Variable bloom-560m-caikit Compile Deploy And Query LLM model isvc_name=${isvc_name} @@ -144,6 +145,7 @@ Verify Model Upgrade Using Canaray Rollout ... namespace=${test_namespace} ... validate_response=${FALSE} ... n_queries=${0} + ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${canary_percentage} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} Log To Console Remove Canary Tarffic For Model Upgrade @@ -160,7 +162,7 @@ Verify Model Upgrade Using Canaray Rollout Verify Model Pods Are Deleted When No Inference Service Is Present [Documentation] Checks if model pods gets successfully deleted after ... deleting the KServe InferenceService object - [Tags] Tier2 ODS-2373 AutomationBug + [Tags] Tier2 ODS-2373 [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=no-infer-kserve ${flan_isvc_name}= Set Variable flan-t5-small-caikit ${model_name}= Set Variable flan-t5-small-caikit @@ -170,6 +172,7 @@ Verify Model Pods Are Deleted When No Inference Service Is Present ... model_storage_uri=${FLAN_STORAGE_URI} ... model_name=${model_name} ... namespace=no-infer-kserve + ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} Delete InfereceService isvc_name=${flan_isvc_name} namespace=no-infer-kserve ${rc} ${out}= Run And Return Rc And Output oc wait pod -l serving.kserve.io/inferenceservice=${flan_isvc_name} -n no-infer-kserve --for=delete --timeout=200s Should Be Equal As Integers ${rc} ${0} From 1b87609c8c845d4e2d22c807ffa9a6516aad9372 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 09:25:11 +0100 Subject: [PATCH 05/16] fix file name --- ..._serving_llm _tgis.robot => 422__model_serving_llm_tgis.robot} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/{422__model_serving_llm _tgis.robot => 422__model_serving_llm_tgis.robot} (100%) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot similarity index 100% rename from ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot rename to ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot From 7aa0889554f7d1d0e7a72c92fc8f875d569555e0 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 09:55:33 +0100 Subject: [PATCH 06/16] fix compile deploy and query keyword --- ods_ci/tests/Resources/CLI/ModelServing/llm.resource | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource index 875866dea..5f206d161 100644 --- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource +++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource @@ -290,13 +290,14 @@ Compile Deploy And Query LLM model [Documentation] Group together the test steps for preparing, deploying ... and querying a model [Arguments] ${model_storage_uri} ${model_name} ${isvc_name}=${model_name} - ... ${runtime}=caikit-tgis-runtime ${protocol}=grpc ${inference_type}=all-tokens + ... ${runtime}=caikit-tgis-runtime ${model_format}=caikit ${protocol}=grpc ${inference_type}=all-tokens ... ${canaryTrafficPercent}=${EMPTY} ${namespace}=${TEST_NS} ${sa_name}=${DEFAULT_BUCKET_SA_NAME} ... ${n_queries}=${1} ${query_idx}=${0} ${validate_response}=${TRUE} Compile Inference Service YAML isvc_name=${isvc_name} - ... sa_name=${sa_name} + ... sa_name=${sa_name} serving_runtime=${runtime} ... model_storage_uri=${model_storage_uri} ... canaryTrafficPercent=${canaryTrafficPercent} + ... model_format=${model_format} Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} ... namespace=${namespace} Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${isvc_name} From 7eff9490eef80f0d5e6b1af52fbe7e721db38f87 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 09:57:08 +0100 Subject: [PATCH 07/16] add clone proto files --- .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 37e0e1114..2bd1847a7 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -543,4 +543,5 @@ Suite Setup [Documentation] Skip If Component Is Not Enabled kserve RHOSi Setup - Load Expected Responses \ No newline at end of file + Load Expected Responses + Run git clone https://github.com/IBM/text-generation-inference/ \ No newline at end of file From c1e879a892af025cf8038abcf7e652b0cdf1c107 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 12:17:49 +0100 Subject: [PATCH 08/16] fix ODS-2377 --- ods_ci/tests/Resources/CLI/ModelServing/llm.resource | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource index 5f206d161..1ebb5ebc9 100644 --- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource +++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource @@ -273,12 +273,12 @@ Query Model Multiple Times Run Keyword And Continue On Failure Status Should Be 200 END Log ${res} - ${response_container_field}= Set Variable ${runtime_details}[response_fields_map][response] - IF "${response_container_field}" != "${EMPTY}" - # runtimes may support multiple queries per time. Here forcing to use only 1 for sake of simplicity. - ${res}= Set Variable ${res}[${response_container_field}][0] - END IF ${validate_response} == ${TRUE} + ${response_container_field}= Set Variable ${runtime_details}[response_fields_map][response] + IF "${response_container_field}" != "${EMPTY}" + # runtimes may support multiple queries per time. Here forcing to use only 1 for sake of simplicity. + ${res}= Set Variable ${res}[${response_container_field}][0] + END Run Keyword And Continue On Failure ... Model Response Should Match The Expectation model_response=${res} model_name=${model_name} ... runtime_details=${runtime_details} runtime=${runtime} From 473df0cd046a8c2e97967923bd52a43a977a4cc7 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 12:18:30 +0100 Subject: [PATCH 09/16] fix ODS-2371 --- .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot | 1 + 1 file changed, 1 insertion(+) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 2bd1847a7..769304939 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -65,6 +65,7 @@ Verify User Can Deploy Multiple Models In The Same Namespace Compile Inference Service YAML isvc_name=${model_two_name} ... sa_name=${DEFAULT_BUCKET_SA_NAME} ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} ... namespace=${test_namespace} Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_one_name} From a4d908e9b5ea4aa9e76ad483acb19eab91a5611c Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 12:50:41 +0100 Subject: [PATCH 10/16] change project name --- .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 769304939..10aa77eba 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -18,7 +18,7 @@ ${FLAN_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_MODEL_S3_DIR} ${FLAN_GRAMMAR_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_GRAMMAR_MODEL_S3_DIR}/artifacts ${FLAN_LARGE_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_LARGE_MODEL_S3_DIR}/artifacts ${BLOOM_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${BLOOM_MODEL_S3_DIR}/artifacts -${TEST_NS}= tgis-standalone2 +${TEST_NS}= tgis-standalone ${TGIS_RUNTIME_NAME}= tgis-runtime From 51567b1261d809038beaf0abac0d52feb87acf87 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 17:09:35 +0100 Subject: [PATCH 11/16] fix ODS-2380 --- .../Resources/CLI/ModelServing/llm.resource | 18 +++++++++++------- .../LLMs/422__model_serving_llm.robot | 8 ++++---- .../LLMs/422__model_serving_llm_tgis.robot | 7 ++++--- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource index 1ebb5ebc9..8616a328c 100644 --- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource +++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource @@ -307,24 +307,28 @@ Compile Deploy And Query LLM model ... validate_response=${validate_response} protocol=${protocol} ... runtime=${runtime} inference_type=${inference_type} -Upgrade Caikit Runtime Image +Upgrade Runtime Image [Documentation] Replaces the image URL of the Caikit Runtim with the given ... ${new_image_url} - [Arguments] ${new_image_url} ${namespace} + [Arguments] ${new_image_url} ${namespace} ${container} ${runtime} + ${rc} ${container_idx}= Run And Return Rc And Output + ... oc get ServingRuntime/${runtime} -n ${namespace} -o json | jq '.spec.containers | map(.name == "${container}") | index(true)' # robocop: disable ${rc} ${out}= Run And Return Rc And Output - ... oc patch ServingRuntime caikit-tgis-runtime -n ${namespace} --type=json -p="[{'op': 'replace', 'path': '/spec/containers/0/image', 'value': '${new_image_url}'}]" # robocop: disable + ... oc patch ServingRuntime ${runtime} -n ${namespace} --type=json -p="[{'op': 'replace', 'path': '/spec/containers/${container_idx}/image', 'value': '${new_image_url}'}]" # robocop: disable Should Be Equal As Integers ${rc} ${0} Get Model Pods Creation Date And Image URL [Documentation] Fetches the creation date and the caikit runtime image URL. ... Useful in upgrade scenarios - [Arguments] ${model_name} ${namespace} + [Arguments] ${model_name} ${namespace} ${container} ${created_at}= Oc Get kind=Pod label_selector=serving.kserve.io/inferenceservice=${model_name} ... namespace=${namespace} fields=["metadata.creationTimestamp"] - ${rc} ${caikitsha}= Run And Return Rc And Output - ... oc get pod --selector serving.kserve.io/inferenceservice=${model_name} -n ${namespace} -ojson | jq '.items[].spec.containers[].image' | grep caikit-tgis # robocop: disable + ${rc} ${container_idx}= Run And Return Rc And Output + ... oc get pod --selector serving.kserve.io/inferenceservice=${model_name} -n ${namespace} -o json | jq '.items[].spec.containers | map(.name == "${container}") | index(true)' # robocop: disable + ${rc} ${image_url}= Run And Return Rc And Output + ... oc get pod --selector serving.kserve.io/inferenceservice=${model_name} -n ${namespace} -ojson | jq '.items[].spec.containers[${container_idx}].image' # robocop: disable Should Be Equal As Integers ${rc} ${0} - RETURN ${created_at} ${caikitsha} + RETURN ${created_at} ${image_url} User Can Fetch Number Of Requests Over Defined Time [Documentation] Fetches the `tgi_request_count` metric and checks that it reports the expected diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot index 01cb848d0..cac0a58c0 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot @@ -465,14 +465,14 @@ Verify Runtime Upgrade Does Not Affect Deployed Models ... inference_type=all-tokens n_times=1 ... namespace=${test_namespace} ${created_at} ${caikitsha}= Get Model Pods Creation Date And Image URL model_name=${flan_model_name} - ... namespace=${test_namespace} - Upgrade Caikit Runtime Image new_image_url=quay.io/opendatahub/caikit-tgis-serving:stable - ... namespace=${test_namespace} + ... namespace=${test_namespace} container=transformer-container + Upgrade Runtime Image new_image_url=quay.io/opendatahub/caikit-tgis-serving:stable + ... namespace=${test_namespace} container=transformer-container runtime=caikit-tgis-runtime Sleep 5s reason=Sleep, in case the runtime upgrade takes some time to start performing actions on the pods... Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} ... namespace=${test_namespace} exp_replicas=1 ${created_at_after} ${caikitsha_after}= Get Model Pods Creation Date And Image URL model_name=${flan_model_name} - ... namespace=${test_namespace} + ... namespace=${test_namespace} container=transformer-container Should Be Equal ${created_at} ${created_at_after} Should Be Equal As Strings ${caikitsha} ${caikitsha_after} [Teardown] Clean Up Test Project test_ns=${test_namespace} diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 10aa77eba..21a55bf84 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -454,14 +454,15 @@ Verify Runtime Upgrade Does Not Affect Deployed Models ... inference_type=all-tokens n_times=1 ... namespace=${test_namespace} ${created_at} ${caikitsha}= Get Model Pods Creation Date And Image URL model_name=${flan_model_name} - ... namespace=${test_namespace} - Upgrade Caikit Runtime Image new_image_url=quay.io/opendatahub/caikit-tgis-serving:stable + ... namespace=${test_namespace} container=kserve-container + Upgrade Runtime Image container=kserve-container runtime=${TGIS_RUNTIME_NAME} + ... new_image_url=quay.io/modh/text-generation-inference:fast ... namespace=${test_namespace} Sleep 5s reason=Sleep, in case the runtime upgrade takes some time to start performing actions on the pods... Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} ... namespace=${test_namespace} exp_replicas=1 ${created_at_after} ${caikitsha_after}= Get Model Pods Creation Date And Image URL model_name=${flan_model_name} - ... namespace=${test_namespace} + ... namespace=${test_namespace} container=kserve-container Should Be Equal ${created_at} ${created_at_after} Should Be Equal As Strings ${caikitsha} ${caikitsha_after} [Teardown] Clean Up Test Project test_ns=${test_namespace} From 1477b974e09a4d80dd43c6a2af760258d1139922 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 17:48:08 +0100 Subject: [PATCH 12/16] fix ODS-2372 --- ods_ci/tests/Resources/CLI/ModelServing/llm.resource | 3 ++- .../420__model_serving/LLMs/422__model_serving_llm.robot | 2 ++ .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource index 8616a328c..601bc60e0 100644 --- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource +++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource @@ -457,6 +457,7 @@ Traffic Should Be Redirected Based On Canary Percentage ... matches the expected probability ${exp_percentage}. ... It applies an arbitrary toleration margin of ${toleration} [Arguments] ${exp_percentage} ${isvc_name} ${model_name} ${namespace} + ... ${model_format} ${runtime} ${total}= Set Variable ${20} ${hits}= Set Variable ${0} ${toleration}= Set Variable ${20} @@ -464,7 +465,7 @@ Traffic Should Be Redirected Based On Canary Percentage Log ${counter} ${status}= Run Keyword And Return Status ... Query Model Multiple Times isvc_name=${isvc_name} model_name=${model_name} n_times=1 - ... namespace=${namespace} + ... namespace=${namespace} model_format=${model_format} runtime=${runtime} IF ${status} == ${TRUE} ${hits}= Evaluate ${hits}+1 END diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot index cac0a58c0..633a37858 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot @@ -170,6 +170,7 @@ Verify Model Upgrade Using Canaray Rollout ... n_queries=${0} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${canary_percentage} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} + ... model_format=caikit runtime=caikit-tgi-runtime Log To Console Remove Canary Tarffic For Model Upgrade Compile Deploy And Query LLM model isvc_name=${isvc_name} ... sa_name=${DEFAULT_BUCKET_SA_NAME} @@ -178,6 +179,7 @@ Verify Model Upgrade Using Canaray Rollout ... namespace=${test_namespace} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${100} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} + ... model_format=caikit runtime=caikit-tgi-runtime [Teardown] Clean Up Test Project test_ns=${test_namespace} ... isvc_names=${isvcs_names} wait_prj_deletion=${FALSE} diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 21a55bf84..185259568 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -149,14 +149,17 @@ Verify Model Upgrade Using Canaray Rollout ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${canary_percentage} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} + ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} Log To Console Remove Canary Tarffic For Model Upgrade Compile Deploy And Query LLM model isvc_name=${isvc_name} ... sa_name=${DEFAULT_BUCKET_SA_NAME} ... model_name=${model_name} ... model_storage_uri=${BLOOM_STORAGE_URI} ... namespace=${test_namespace} + ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${100} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} + ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} [Teardown] Clean Up Test Project test_ns=${test_namespace} ... isvc_names=${isvcs_names} wait_prj_deletion=${FALSE} From d57e21fd6fe27c500057c095099c5b6fb3311568 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 20 Feb 2024 17:48:36 +0100 Subject: [PATCH 13/16] remove auto bug label from ODS-2372 --- .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot | 1 - 1 file changed, 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 185259568..8e8a79853 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -122,7 +122,6 @@ Verify User Can Deploy Multiple Models In Different Namespaces Verify Model Upgrade Using Canaray Rollout [Documentation] Checks if user can apply Canary Rollout as deployment strategy [Tags] Tier1 ODS-2372 - ... AutomationBug [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=canary-model-upgrade ${test_namespace}= Set Variable canary-model-upgrade ${isvc_name}= Set Variable canary-caikit From 7778cf425aa004a59d7ebe2b449f591cd14d85ad Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 21 Feb 2024 10:11:44 +0100 Subject: [PATCH 14/16] remove wrong arg from canary kw --- ods_ci/tests/Resources/CLI/ModelServing/llm.resource | 4 ++-- .../420__model_serving/LLMs/422__model_serving_llm.robot | 4 ++-- .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource index 601bc60e0..fab884ac6 100644 --- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource +++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource @@ -457,7 +457,7 @@ Traffic Should Be Redirected Based On Canary Percentage ... matches the expected probability ${exp_percentage}. ... It applies an arbitrary toleration margin of ${toleration} [Arguments] ${exp_percentage} ${isvc_name} ${model_name} ${namespace} - ... ${model_format} ${runtime} + ... ${runtime} ${total}= Set Variable ${20} ${hits}= Set Variable ${0} ${toleration}= Set Variable ${20} @@ -465,7 +465,7 @@ Traffic Should Be Redirected Based On Canary Percentage Log ${counter} ${status}= Run Keyword And Return Status ... Query Model Multiple Times isvc_name=${isvc_name} model_name=${model_name} n_times=1 - ... namespace=${namespace} model_format=${model_format} runtime=${runtime} + ... namespace=${namespace} runtime=${runtime} IF ${status} == ${TRUE} ${hits}= Evaluate ${hits}+1 END diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot index 633a37858..9dcb4df82 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot @@ -170,7 +170,7 @@ Verify Model Upgrade Using Canaray Rollout ... n_queries=${0} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${canary_percentage} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} - ... model_format=caikit runtime=caikit-tgi-runtime + ... runtime=caikit-tgi-runtime Log To Console Remove Canary Tarffic For Model Upgrade Compile Deploy And Query LLM model isvc_name=${isvc_name} ... sa_name=${DEFAULT_BUCKET_SA_NAME} @@ -179,7 +179,7 @@ Verify Model Upgrade Using Canaray Rollout ... namespace=${test_namespace} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${100} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} - ... model_format=caikit runtime=caikit-tgi-runtime + ... runtime=caikit-tgi-runtime [Teardown] Clean Up Test Project test_ns=${test_namespace} ... isvc_names=${isvcs_names} wait_prj_deletion=${FALSE} diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 8e8a79853..71873250b 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -148,7 +148,7 @@ Verify Model Upgrade Using Canaray Rollout ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${canary_percentage} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} - ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} + ... runtime=${TGIS_RUNTIME_NAME} Log To Console Remove Canary Tarffic For Model Upgrade Compile Deploy And Query LLM model isvc_name=${isvc_name} ... sa_name=${DEFAULT_BUCKET_SA_NAME} @@ -158,7 +158,7 @@ Verify Model Upgrade Using Canaray Rollout ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${100} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} - ... model_format=pytorch runtime=${TGIS_RUNTIME_NAME} + ... runtime=${TGIS_RUNTIME_NAME} [Teardown] Clean Up Test Project test_ns=${test_namespace} ... isvc_names=${isvcs_names} wait_prj_deletion=${FALSE} From 6123d9da2a56fa9ab49c81b5c1c653621bf2e585 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 21 Feb 2024 10:19:26 +0100 Subject: [PATCH 15/16] change metrics check for tgis ODS-2401 --- .../LLMs/422__model_serving_llm_tgis.robot | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 71873250b..08f353070 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -20,6 +20,7 @@ ${FLAN_LARGE_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_LARGE_MODEL_S3_DIR ${BLOOM_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${BLOOM_MODEL_S3_DIR}/artifacts ${TEST_NS}= tgis-standalone ${TGIS_RUNTIME_NAME}= tgis-runtime +@{SEARCH_METRICS}= tgi_ istio_ *** Test Cases *** @@ -490,9 +491,11 @@ Verify User Can Access Model Metrics From UWM Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} ... namespace=${test_namespace} Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} - ... namespace=${test_namespace} + ... namespace=${test_namespace} Wait Until Keyword Succeeds 30 times 4s - ... TGI Caikit And Istio Metrics Should Exist thanos_url=${thanos_url} thanos_token=${token} + ... Metrics Should Exist In UserWorkloadMonitoring + ... thanos_url=${thanos_url} thanos_token=${token} + ... search_metrics=${SEARCH_METRICS} Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} ... inference_type=all-tokens n_times=3 ... namespace=${test_namespace} From 066c8ba3bb6d857ae4d5bef49725a99c67847968 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 21 Feb 2024 12:33:16 +0100 Subject: [PATCH 16/16] fix typo --- .../420__model_serving/LLMs/422__model_serving_llm.robot | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot index 9dcb4df82..9f1122d48 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot @@ -170,7 +170,7 @@ Verify Model Upgrade Using Canaray Rollout ... n_queries=${0} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${canary_percentage} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} - ... runtime=caikit-tgi-runtime + ... runtime=caikit-tgis-runtime Log To Console Remove Canary Tarffic For Model Upgrade Compile Deploy And Query LLM model isvc_name=${isvc_name} ... sa_name=${DEFAULT_BUCKET_SA_NAME} @@ -179,7 +179,7 @@ Verify Model Upgrade Using Canaray Rollout ... namespace=${test_namespace} Traffic Should Be Redirected Based On Canary Percentage exp_percentage=${100} ... isvc_name=${isvc_name} model_name=${model_name} namespace=${test_namespace} - ... runtime=caikit-tgi-runtime + ... runtime=caikit-tgis-runtime [Teardown] Clean Up Test Project test_ns=${test_namespace} ... isvc_names=${isvcs_names} wait_prj_deletion=${FALSE}