From 654d1c3fe334019d4eb18f010be6dc3a1f86eab8 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Mon, 22 Jan 2024 18:02:27 +0100 Subject: [PATCH 01/15] add UI test for TGIS --- ..._model_serving_llm_other_runtimes_UI.robot | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot new file mode 100644 index 000000000..c4675e516 --- /dev/null +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -0,0 +1,74 @@ +*** Settings *** +Documentation Collection of UI tests to validate the model serving stack for Large Language Models (LLM). +... These tests leverage on Caikit+TGIS combined Serving Runtime +Resource ../../../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resource +Resource ../../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource +Resource ../../../../Resources/CLI/ModelServing/llm.resource +Suite Setup Non-Admin Setup Kserve UI Test +Suite Teardown Non-Admin Teardown Kserve UI Test +Test Tags KServe + + +*** Variables *** +${LLM_RESOURCES_DIRPATH}= ods_ci/tests/Resources/Files/llm +${TEST_NS}= runtimes-ui +${EXP_RESPONSES_FILEPATH}= ${LLM_RESOURCES_DIRPATH}/model_expected_responses.json +${FLAN_MODEL_S3_DIR}= flan-t5-small/flan-t5-small-hf +${FLAN_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_MODEL_S3_DIR}/ +${TGIS_RUNTIME_NAME}= tgis-runtime + + +*** Test Cases *** +Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable + [Documentation] Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model + ... using Kserve and Caikit+TGIS runtime + [Tags] Sanity Tier1 ODS-XYZ + ${test_namespace}= Set Variable ${TEST_NS} + ${model_name}= Set Variable flan-t5-small-hf + Deploy Kserve Model Via UI model_name=${model_name} serving_runtime=TGIS Standalone ServingRuntime for KServe (gRPC) + ... data_connection=kserve-connection model_framework=pytorch path=${FLAN_MODEL_S3_DIR} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} + Query Model Multiple Times model_name=${model_name} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} protocol=grpc + Query Model Multiple Times model_name=${model_name} + ... inference_type=streaming n_times=1 + ... namespace=${test_namespace} protocol=http validate_response=$FALSE + Delete Model Via UI ${model_name} + +##Verify Model Can Be Served And Query On A GPU Node Using The UI # robocop: disable +## [Documentation] Basic tests for preparing, deploying and querying a LLM model on GPU node +## ... using Kserve and Caikit+TGIS runtime +## [Tags] Sanity Tier1 ODS-XYZ Resources-GPU + +## Verify User Can Access Model Metrics From UWM Using The UI # robocop: disable +## [Documentation] Verifies that model metrics are available for users in the +## ... OpenShift monitoring system (UserWorkloadMonitoring) +## ... PARTIALLY DONE: it is checking number of requests, number of successful requests +## ... and model pod cpu usage. Waiting for a complete list of expected metrics and +## ... derived metrics. +## [Tags] Sanity Tier1 ODS-XYZ + + + +*** Keywords *** +Non-Admin Setup Kserve UI Test + [Documentation] Instaling And Configuring dependency operators: Service Mesh and Serverless. + ... This is likely going to change in the future and it will include a way to skip installation. + ... Caikit runtime will be shipped Out-of-the-box and will be removed from here. + [Arguments] ${user}=${TEST_USER_3.USERNAME} ${pw}=${TEST_USER_3.PASSWORD} ${auth}=${TEST_USER_3.AUTH_TYPE} + Set Library Search Order SeleniumLibrary + Skip If Component Is Not Enabled kserve + # RHOSi Setup + Load Expected Responses + Launch Dashboard ${user} ${pw} ${auth} ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} + Set Up Project namespace=${TEST_NS} single_prj=${FALSE} + Fetch CA Certificate If RHODS Is Self-Managed + +Non-Admin Teardown Kserve UI Test + Delete Data Science Project project_title=${TEST_NS} + # if UI deletion fails it will try deleting from CLI + Delete Data Science Projects From CLI ocp_projects=${PROJECTS_TO_DELETE} + SeleniumLibrary.Close All Browsers + # RHOSi Teardown \ No newline at end of file From 865f170163125b6d178ceb0d1ddd87995d6b3e7e Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 23 Jan 2024 12:42:45 +0100 Subject: [PATCH 02/15] add metrics check with TGIS standalone --- ..._model_serving_llm_other_runtimes_UI.robot | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot index c4675e516..3cc4a06c8 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -16,12 +16,13 @@ ${EXP_RESPONSES_FILEPATH}= ${LLM_RESOURCES_DIRPATH}/model_expected_responses. ${FLAN_MODEL_S3_DIR}= flan-t5-small/flan-t5-small-hf ${FLAN_STORAGE_URI}= s3://${S3.BUCKET_3.NAME}/${FLAN_MODEL_S3_DIR}/ ${TGIS_RUNTIME_NAME}= tgis-runtime +@{SEARCH_METRICS}= tgi_ istio_ *** Test Cases *** Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable [Documentation] Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model - ... using Kserve and Caikit+TGIS runtime + ... using Kserve and TGIS Standalone runtime. [Tags] Sanity Tier1 ODS-XYZ ${test_namespace}= Set Variable ${TEST_NS} ${model_name}= Set Variable flan-t5-small-hf @@ -29,12 +30,18 @@ Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable ... data_connection=kserve-connection model_framework=pytorch path=${FLAN_MODEL_S3_DIR} Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} ... namespace=${test_namespace} - Query Model Multiple Times model_name=${model_name} + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} ... inference_type=all-tokens n_times=1 ... namespace=${test_namespace} protocol=grpc - Query Model Multiple Times model_name=${model_name} + Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME} ... inference_type=streaming n_times=1 - ... namespace=${test_namespace} protocol=http validate_response=$FALSE + ... namespace=${test_namespace} protocol=grpc validate_response=${FALSE} + Wait Until Keyword Succeeds 30 times 4s + ... Metrics Should Exist In UserWorkloadMonitoring thanos_url=${THANOS_URL} thanos_token=${THANOS_TOKEN} + ... search_metrics=${SEARCH_METRICS} + Wait Until Keyword Succeeds 50 times 5s + ... User Can Fetch Number Of Requests Over Defined Time thanos_url=${THANOS_URL} thanos_token=${THANOS_TOKEN} + ... model_name=${model_name} query_kind=single namespace=${test_namespace} period=5m exp_value=1 Delete Model Via UI ${model_name} ##Verify Model Can Be Served And Query On A GPU Node Using The UI # robocop: disable @@ -42,15 +49,6 @@ Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable ## ... using Kserve and Caikit+TGIS runtime ## [Tags] Sanity Tier1 ODS-XYZ Resources-GPU -## Verify User Can Access Model Metrics From UWM Using The UI # robocop: disable -## [Documentation] Verifies that model metrics are available for users in the -## ... OpenShift monitoring system (UserWorkloadMonitoring) -## ... PARTIALLY DONE: it is checking number of requests, number of successful requests -## ... and model pod cpu usage. Waiting for a complete list of expected metrics and -## ... derived metrics. -## [Tags] Sanity Tier1 ODS-XYZ - - *** Keywords *** Non-Admin Setup Kserve UI Test @@ -64,7 +62,13 @@ Non-Admin Setup Kserve UI Test Load Expected Responses Launch Dashboard ${user} ${pw} ${auth} ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} Set Up Project namespace=${TEST_NS} single_prj=${FALSE} + ${PROJECTS_TO_DELETE}= Create List ${TEST_NS} + Set Suite Variable ${PROJECTS_TO_DELETE} Fetch CA Certificate If RHODS Is Self-Managed + ${thanos_url}= Get OpenShift Thanos URL + ${token}= Generate Thanos Token + Set Suite Variable ${THANOS_URL} ${thanos_url} + Set Suite Variable ${THANOS_TOKEN} ${token} Non-Admin Teardown Kserve UI Test Delete Data Science Project project_title=${TEST_NS} From 70b0d7f8f8005e6f569ad2831485e11c08db65a7 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 23 Jan 2024 13:26:31 +0100 Subject: [PATCH 03/15] add git clone for proto files --- .../LLMs/422__model_serving_llm_other_runtimes_UI.robot | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot index 3cc4a06c8..219cb40eb 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -24,6 +24,7 @@ Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable [Documentation] Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model ... using Kserve and TGIS Standalone runtime. [Tags] Sanity Tier1 ODS-XYZ + [Setup] Run git clone https://github.com/IBM/text-generation-inference/ ${test_namespace}= Set Variable ${TEST_NS} ${model_name}= Set Variable flan-t5-small-hf Deploy Kserve Model Via UI model_name=${model_name} serving_runtime=TGIS Standalone ServingRuntime for KServe (gRPC) @@ -61,7 +62,7 @@ Non-Admin Setup Kserve UI Test # RHOSi Setup Load Expected Responses Launch Dashboard ${user} ${pw} ${auth} ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} - Set Up Project namespace=${TEST_NS} single_prj=${FALSE} + Set Up Project namespace=${TEST_NS} single_prj=${FALSE} enable_metrics=${TRUE} ${PROJECTS_TO_DELETE}= Create List ${TEST_NS} Set Suite Variable ${PROJECTS_TO_DELETE} Fetch CA Certificate If RHODS Is Self-Managed From 9af45e32234d576c5b0b6c88d9411616f0fc9b13 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 23 Jan 2024 13:26:54 +0100 Subject: [PATCH 04/15] add test id --- .../LLMs/422__model_serving_llm_other_runtimes_UI.robot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot index 219cb40eb..e97f96649 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -23,7 +23,7 @@ ${TGIS_RUNTIME_NAME}= tgis-runtime Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable [Documentation] Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model ... using Kserve and TGIS Standalone runtime. - [Tags] Sanity Tier1 ODS-XYZ + [Tags] Sanity Tier1 ODS-2611 [Setup] Run git clone https://github.com/IBM/text-generation-inference/ ${test_namespace}= Set Variable ${TEST_NS} ${model_name}= Set Variable flan-t5-small-hf From 95afcefb0f586c7696f3bcb3de7ffd68bbbc0bcf Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 23 Jan 2024 18:03:40 +0100 Subject: [PATCH 05/15] add generic keyword for checking metrics existance UWM --- ods_ci/tests/Resources/OCP.resource | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ods_ci/tests/Resources/OCP.resource b/ods_ci/tests/Resources/OCP.resource index 519c5d6db..5ab9f97c5 100644 --- a/ods_ci/tests/Resources/OCP.resource +++ b/ods_ci/tests/Resources/OCP.resource @@ -167,4 +167,19 @@ Wait Until CRD Exists [Documentation] Repeatedly searches for the expected CRD. Stops when it finds it [Arguments] ${crd_fullname} Wait Until Keyword Succeeds 15 times 5s - ... Oc Get kind=CustomResourceDefinition name=${crd_fullname} \ No newline at end of file + ... Oc Get kind=CustomResourceDefinition name=${crd_fullname} + +Metrics Should Exist In UserWorkloadMonitoring + [Documentation] Checks that the given ${metrics_searches} exists. It also accepts partial names. + ... Returns the complete list of metrics found with the given search texts + [Arguments] ${thanos_url} ${thanos_token} ${search_metrics} + ${metrics}= Create List + FOR ${index} ${metric_search_text} IN ENUMERATE @{search_metrics} + Log ${index}: ${metric_search_text} + ${metrics_names}= Get Thanos Metrics List thanos_url=${thanos_url} thanos_token=${thanos_token} + ... search_text=${metric_search_text} + Should Not Be Empty ${metrics_names} + ${metrics_names}= Split To Lines ${metrics_names} + ${metrics}= Append To List ${metrics} @{metrics_names} + END + RETURN ${metrics} From f59c2f3ccf6e333877083319222e887ca68606d2 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 23 Jan 2024 18:04:17 +0100 Subject: [PATCH 06/15] increase project deletion timeout --- .../ODH/ODHDashboard/ODHDataScienceProject/Projects.resource | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource index 5508ed73e..b7d9dc25e 100644 --- a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource +++ b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource @@ -63,7 +63,7 @@ Delete Data Science Project Wait Until Data Science Project Is Deleted [Documentation] Checks if when a DS Project is deleted its Openshift namespace gets deleted too [Arguments] ${project_title} - Wait Until Keyword Succeeds 15 times 2s + Wait Until Keyword Succeeds 30 times 2s ... Project Should Not Exist In Openshift project_title=${project_title} Project Should Not Exist In Openshift From 93ea6d2b8e57f18c36e846af0885d13dcecc590f Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 23 Jan 2024 18:04:55 +0100 Subject: [PATCH 07/15] add tgis ui test on GPU --- ..._model_serving_llm_other_runtimes_UI.robot | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot index e97f96649..518705dbc 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -45,10 +45,31 @@ Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable ... model_name=${model_name} query_kind=single namespace=${test_namespace} period=5m exp_value=1 Delete Model Via UI ${model_name} -##Verify Model Can Be Served And Query On A GPU Node Using The UI # robocop: disable -## [Documentation] Basic tests for preparing, deploying and querying a LLM model on GPU node -## ... using Kserve and Caikit+TGIS runtime -## [Tags] Sanity Tier1 ODS-XYZ Resources-GPU +Verify Model Can Be Served And Query On A GPU Node Using The UI # robocop: disable + [Documentation] Basic tests for preparing, deploying and querying a LLM model on GPU node + ... using Kserve and Caikit+TGIS runtime + [Tags] Sanity Tier1 ODS-XYZ Resources-GPU + [Setup] Run git clone https://github.com/IBM/text-generation-inference/ + ${test_namespace}= Set Variable ${TEST_NS} + ${isvc__name}= Set Variable flan-t5-small-hf-gpu + ${model_name}= Set Variable flan-t5-small-hf + ${requests}= Create Dictionary nvidia.com/gpu=1 + ${limits}= Create Dictionary nvidia.com/gpu=1 + Deploy Kserve Model Via UI model_name=${isvc__name} serving_runtime=TGIS Standalone ServingRuntime for KServe (gRPC) + ... data_connection=kserve-connection model_framework=pytorch path=${FLAN_MODEL_S3_DIR} + ... no_gpus=${1} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${isvc__name} + ... namespace=${test_namespace} + Container Hardware Resources Should Match Expected container_name=kserve-container + ... pod_label_selector=serving.kserve.io/inferenceservice=${isvc__name} + ... namespace=${test_namespace} exp_requests=${requests} exp_limits=${limits} + Query Model Multiple Times model_name=${model_name} isvc_name=${isvc__name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} protocol=grpc + Query Model Multiple Times model_name=${model_name} isvc_name=${isvc__name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=streaming n_times=1 + ... namespace=${test_namespace} protocol=grpc validate_response=${FALSE} + Delete Model Via UI ${isvc__name} *** Keywords *** @@ -59,7 +80,7 @@ Non-Admin Setup Kserve UI Test [Arguments] ${user}=${TEST_USER_3.USERNAME} ${pw}=${TEST_USER_3.PASSWORD} ${auth}=${TEST_USER_3.AUTH_TYPE} Set Library Search Order SeleniumLibrary Skip If Component Is Not Enabled kserve - # RHOSi Setup + RHOSi Setup Load Expected Responses Launch Dashboard ${user} ${pw} ${auth} ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} Set Up Project namespace=${TEST_NS} single_prj=${FALSE} enable_metrics=${TRUE} @@ -76,4 +97,4 @@ Non-Admin Teardown Kserve UI Test # if UI deletion fails it will try deleting from CLI Delete Data Science Projects From CLI ocp_projects=${PROJECTS_TO_DELETE} SeleniumLibrary.Close All Browsers - # RHOSi Teardown \ No newline at end of file + RHOSi Teardown \ No newline at end of file From c1f5fb5270e1b84482627ce4ff854e53a9568b8e Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 23 Jan 2024 18:08:49 +0100 Subject: [PATCH 08/15] add common kw to set thanos suite variables --- .../Resources/Page/ODH/Monitoring/Monitoring.resource | 7 +++++++ ods_ci/tests/Resources/RHOSi.resource | 1 + .../LLMs/422__model_serving_llm_other_runtimes_UI.robot | 5 +---- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource index e456bea60..fded9627e 100644 --- a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource +++ b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource @@ -160,3 +160,10 @@ Get Thanos Metrics List END ${rc} ${out}= Run And Return Rc And Output ${cmd} | tr -d '"' RETURN ${out} + + +Set Thanos Credentials Variables + ${thanos_url}= Get OpenShift Thanos URL + ${token}= Generate Thanos Token + Set Suite Variable ${THANOS_URL} ${thanos_url} + Set Suite Variable ${THANOS_TOKEN} ${token} \ No newline at end of file diff --git a/ods_ci/tests/Resources/RHOSi.resource b/ods_ci/tests/Resources/RHOSi.resource index 9e9774135..c4770d84c 100644 --- a/ods_ci/tests/Resources/RHOSi.resource +++ b/ods_ci/tests/Resources/RHOSi.resource @@ -43,6 +43,7 @@ Resource Common.robot ... Create Secret For S3-Like Buckets ... Login To OCP Using API ... Generate Thanos Token +... Set Thanos Credentials Variables *** Keywords *** diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot index 518705dbc..649e52c31 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -87,10 +87,7 @@ Non-Admin Setup Kserve UI Test ${PROJECTS_TO_DELETE}= Create List ${TEST_NS} Set Suite Variable ${PROJECTS_TO_DELETE} Fetch CA Certificate If RHODS Is Self-Managed - ${thanos_url}= Get OpenShift Thanos URL - ${token}= Generate Thanos Token - Set Suite Variable ${THANOS_URL} ${thanos_url} - Set Suite Variable ${THANOS_TOKEN} ${token} + Set Thanos Credentials Variables Non-Admin Teardown Kserve UI Test Delete Data Science Project project_title=${TEST_NS} From ba05db77666b9ca2f8d2e6f005d6de970d2ad3e1 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Tue, 23 Jan 2024 18:19:20 +0100 Subject: [PATCH 09/15] update docs --- .../LLMs/422__model_serving_llm_other_runtimes_UI.robot | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot index 649e52c31..b5952a548 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -1,6 +1,5 @@ *** Settings *** -Documentation Collection of UI tests to validate the model serving stack for Large Language Models (LLM). -... These tests leverage on Caikit+TGIS combined Serving Runtime +Documentation Collection of UI tests to validate the model serving stack for Large Language Models (LLM) Resource ../../../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resource Resource ../../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource Resource ../../../../Resources/CLI/ModelServing/llm.resource @@ -22,7 +21,7 @@ ${TGIS_RUNTIME_NAME}= tgis-runtime *** Test Cases *** Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable [Documentation] Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model - ... using Kserve and TGIS Standalone runtime. + ... using Single-model platform and TGIS Standalone runtime. [Tags] Sanity Tier1 ODS-2611 [Setup] Run git clone https://github.com/IBM/text-generation-inference/ ${test_namespace}= Set Variable ${TEST_NS} @@ -47,7 +46,7 @@ Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable Verify Model Can Be Served And Query On A GPU Node Using The UI # robocop: disable [Documentation] Basic tests for preparing, deploying and querying a LLM model on GPU node - ... using Kserve and Caikit+TGIS runtime + ... using Single-model platform and TGIS Standalone runtime. [Tags] Sanity Tier1 ODS-XYZ Resources-GPU [Setup] Run git clone https://github.com/IBM/text-generation-inference/ ${test_namespace}= Set Variable ${TEST_NS} @@ -94,4 +93,4 @@ Non-Admin Teardown Kserve UI Test # if UI deletion fails it will try deleting from CLI Delete Data Science Projects From CLI ocp_projects=${PROJECTS_TO_DELETE} SeleniumLibrary.Close All Browsers - RHOSi Teardown \ No newline at end of file + RHOSi Teardown From 81137cca8b4ea2d8532e794e85815373b67cdac7 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 24 Jan 2024 15:08:57 +0100 Subject: [PATCH 10/15] add polarion id --- .../LLMs/422__model_serving_llm_other_runtimes_UI.robot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot index b5952a548..e1fa4e2e6 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -47,7 +47,7 @@ Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable Verify Model Can Be Served And Query On A GPU Node Using The UI # robocop: disable [Documentation] Basic tests for preparing, deploying and querying a LLM model on GPU node ... using Single-model platform and TGIS Standalone runtime. - [Tags] Sanity Tier1 ODS-XYZ Resources-GPU + [Tags] Sanity Tier1 ODS-2612 Resources-GPU [Setup] Run git clone https://github.com/IBM/text-generation-inference/ ${test_namespace}= Set Variable ${TEST_NS} ${isvc__name}= Set Variable flan-t5-small-hf-gpu From 272c4164d93ac6de05010d57a4771dbf2f63a9ee Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 24 Jan 2024 15:24:31 +0100 Subject: [PATCH 11/15] fix metrics keyword --- ods_ci/tests/Resources/OCP.resource | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Resources/OCP.resource b/ods_ci/tests/Resources/OCP.resource index 5ab9f97c5..a8780a2e6 100644 --- a/ods_ci/tests/Resources/OCP.resource +++ b/ods_ci/tests/Resources/OCP.resource @@ -180,6 +180,6 @@ Metrics Should Exist In UserWorkloadMonitoring ... search_text=${metric_search_text} Should Not Be Empty ${metrics_names} ${metrics_names}= Split To Lines ${metrics_names} - ${metrics}= Append To List ${metrics} @{metrics_names} + Append To List ${metrics} @{metrics_names} END RETURN ${metrics} From d15433ea83b0929c99ca1ab105c34bc97f833fee Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 24 Jan 2024 15:24:45 +0100 Subject: [PATCH 12/15] increase project deletion timeout --- .../ODH/ODHDashboard/ODHDataScienceProject/Projects.resource | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource index 0e158f41f..18d4ee460 100644 --- a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource +++ b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource @@ -63,7 +63,7 @@ Delete Data Science Project Wait Until Data Science Project Is Deleted [Documentation] Checks if when a DS Project is deleted its Openshift namespace gets deleted too [Arguments] ${project_title} - Wait Until Keyword Succeeds 30 times 2s + Wait Until Keyword Succeeds 35 times 3s ... Project Should Not Exist In Openshift project_title=${project_title} Project Should Not Exist In Openshift From a1317e0db52d99eb264776533fbae208f832f769 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 24 Jan 2024 15:41:11 +0100 Subject: [PATCH 13/15] increase project deletion timeout again --- .../ODH/ODHDashboard/ODHDataScienceProject/Projects.resource | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource index 18d4ee460..1957e5ea9 100644 --- a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource +++ b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource @@ -63,7 +63,7 @@ Delete Data Science Project Wait Until Data Science Project Is Deleted [Documentation] Checks if when a DS Project is deleted its Openshift namespace gets deleted too [Arguments] ${project_title} - Wait Until Keyword Succeeds 35 times 3s + Wait Until Keyword Succeeds 40 times 5s ... Project Should Not Exist In Openshift project_title=${project_title} Project Should Not Exist In Openshift From 41408b29bdd5bef85d5d2304a235268d418032e9 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 24 Jan 2024 15:58:05 +0100 Subject: [PATCH 14/15] mv metrics keyword to monitoring resource file --- ods_ci/tests/Resources/OCP.resource | 15 --------------- .../Page/ODH/Monitoring/Monitoring.resource | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/ods_ci/tests/Resources/OCP.resource b/ods_ci/tests/Resources/OCP.resource index a8780a2e6..40b5a8750 100644 --- a/ods_ci/tests/Resources/OCP.resource +++ b/ods_ci/tests/Resources/OCP.resource @@ -168,18 +168,3 @@ Wait Until CRD Exists [Arguments] ${crd_fullname} Wait Until Keyword Succeeds 15 times 5s ... Oc Get kind=CustomResourceDefinition name=${crd_fullname} - -Metrics Should Exist In UserWorkloadMonitoring - [Documentation] Checks that the given ${metrics_searches} exists. It also accepts partial names. - ... Returns the complete list of metrics found with the given search texts - [Arguments] ${thanos_url} ${thanos_token} ${search_metrics} - ${metrics}= Create List - FOR ${index} ${metric_search_text} IN ENUMERATE @{search_metrics} - Log ${index}: ${metric_search_text} - ${metrics_names}= Get Thanos Metrics List thanos_url=${thanos_url} thanos_token=${thanos_token} - ... search_text=${metric_search_text} - Should Not Be Empty ${metrics_names} - ${metrics_names}= Split To Lines ${metrics_names} - Append To List ${metrics} @{metrics_names} - END - RETURN ${metrics} diff --git a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource index fded9627e..5507240da 100644 --- a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource +++ b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource @@ -161,9 +161,23 @@ Get Thanos Metrics List ${rc} ${out}= Run And Return Rc And Output ${cmd} | tr -d '"' RETURN ${out} - Set Thanos Credentials Variables ${thanos_url}= Get OpenShift Thanos URL ${token}= Generate Thanos Token Set Suite Variable ${THANOS_URL} ${thanos_url} - Set Suite Variable ${THANOS_TOKEN} ${token} \ No newline at end of file + Set Suite Variable ${THANOS_TOKEN} ${token} + +Metrics Should Exist In UserWorkloadMonitoring + [Documentation] Checks that the given ${metrics_searches} exists. It also accepts partial names. + ... Returns the complete list of metrics found with the given search texts + [Arguments] ${thanos_url} ${thanos_token} ${search_metrics} + ${metrics}= Create List + FOR ${index} ${metric_search_text} IN ENUMERATE @{search_metrics} + Log ${index}: ${metric_search_text} + ${metrics_names}= Get Thanos Metrics List thanos_url=${thanos_url} thanos_token=${thanos_token} + ... search_text=${metric_search_text} + Should Not Be Empty ${metrics_names} + ${metrics_names}= Split To Lines ${metrics_names} + Append To List ${metrics} @{metrics_names} + END + RETURN ${metrics} From 43614bb76dfef8f801d35fe48ee681711ccbf0e1 Mon Sep 17 00:00:00 2001 From: bdattoma Date: Wed, 24 Jan 2024 16:26:44 +0100 Subject: [PATCH 15/15] fix robocop alerts --- .../LLMs/422__model_serving_llm_other_runtimes_UI.robot | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot index e1fa4e2e6..165ddd752 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot @@ -26,7 +26,8 @@ Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable [Setup] Run git clone https://github.com/IBM/text-generation-inference/ ${test_namespace}= Set Variable ${TEST_NS} ${model_name}= Set Variable flan-t5-small-hf - Deploy Kserve Model Via UI model_name=${model_name} serving_runtime=TGIS Standalone ServingRuntime for KServe (gRPC) + Deploy Kserve Model Via UI model_name=${model_name} + ... serving_runtime=TGIS Standalone ServingRuntime for KServe (gRPC) ... data_connection=kserve-connection model_framework=pytorch path=${FLAN_MODEL_S3_DIR} Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} ... namespace=${test_namespace} @@ -37,10 +38,12 @@ Verify Non Admin Can Serve And Query A Model Using The UI # robocop: disable ... inference_type=streaming n_times=1 ... namespace=${test_namespace} protocol=grpc validate_response=${FALSE} Wait Until Keyword Succeeds 30 times 4s - ... Metrics Should Exist In UserWorkloadMonitoring thanos_url=${THANOS_URL} thanos_token=${THANOS_TOKEN} + ... Metrics Should Exist In UserWorkloadMonitoring + ... thanos_url=${THANOS_URL} thanos_token=${THANOS_TOKEN} ... search_metrics=${SEARCH_METRICS} Wait Until Keyword Succeeds 50 times 5s - ... User Can Fetch Number Of Requests Over Defined Time thanos_url=${THANOS_URL} thanos_token=${THANOS_TOKEN} + ... User Can Fetch Number Of Requests Over Defined Time + ... thanos_url=${THANOS_URL} thanos_token=${THANOS_TOKEN} ... model_name=${model_name} query_kind=single namespace=${test_namespace} period=5m exp_value=1 Delete Model Via UI ${model_name}