From c7a233080fcc912e1c6ffadcda480dd09c9c431e Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Fri, 16 Feb 2024 16:55:30 +0100 Subject: [PATCH 1/9] Add OVMS on Kserve GPU test Signed-off-by: Luca Giorgi --- .../ModelServer.resource | 14 +++++ .../425__model_serving_ovms_on_kserve.robot | 53 +++++++++++++++++-- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource index 0a8939dd6..b17d3db1e 100644 --- a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource +++ b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource @@ -151,6 +151,20 @@ Verify Displayed GPU Count ${current_accs}= Get Text xpath://span[text()="${server_name}"]/../../../following-sibling::tr//td[@data-label]/div/dl/div[4]/dd/div Should Match ${current_accs} ${no_gpus} +Verify Displayed GPU Count In Single Model Serving + [Documentation] Verifies the number of GPUs displayed in the Model Server table of single model serving matches + ... the expected value + [Arguments] ${model_name} ${no_gpus} + Wait Until Page Contains Element xpath://td[@data-label="Name"]/div[text()="${model_name}"] + ${expanded}= Run Keyword And Return Status Page Should Contain Element + ... xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="true"] + IF ${expanded}==False + Click Element xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="true"] + END + Sleep 5s reason=Sometimes the number of current Accelerators take a few seconds to update + ${current_accs}= Get Text xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../../../tr[last()]//span[.="Number of accelerators"]/../../dd/div #robocop: disable + Should Match ${current_accs} ${no_gpus} + Set Model Server Runtime [Documentation] Selects a given Runtime for the model server [Arguments] ${runtime} diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot index e654b7e27..dfed472a0 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot @@ -27,9 +27,14 @@ ${SECURED_MODEL}= test-model-secured ${EXPECTED_INFERENCE_SECURED_OUTPUT}= {"model_name":"${SECURED_MODEL}__isvc-83d6fab7bd","model_version":"1","outputs":[{"name":"Plus214_Output_0","datatype":"FP32","shape":[1,10],"data":[-8.233053,-7.7497034,-3.4236815,12.3630295,-12.079103,17.266596,-10.570976,0.7130762,3.321715,1.3621228]}]} #robocop: disable ${EXPECTED_INFERENCE_OUTPUT_OPENVINO}= {"model_name":"${MODEL_NAME}__isvc-8655dc7979","model_version":"1","outputs":[{"name":"Func/StatefulPartitionedCall/output/_13:0","datatype":"FP32","shape":[1,1],"data":[0.99999994]}]} #robocop: disable +${PRJ_TITLE_GPU}= model-serving-project-gpu +${PRJ_DESCRIPTION_GPU}= project used for model serving tests (with GPUs) +${MODEL_NAME_GPU}= vehicle-detection +${MODEL_CREATED}= ${FALSE} +${RUNTIME_NAME_GPU}= Model Serving GPU Test *** Test Cases *** -Verify Openvino_IR Model Via UI (Kserve) +Verify Openvino_IR Model Via UI (OVMS on Kserve) [Documentation] Test the deployment of an openvino_ir model in Kserve using OVMS [Tags] Smoke ... ODS-2626 @@ -41,7 +46,7 @@ Verify Openvino_IR Model Via UI (Kserve) ... Run Keyword If Test Failed Get Kserve Events And Logs ... model_name=${MODEL_NAME} project_title=${PRJ_TITLE} -Verify Tensorflow Model Via UI (Kserve) +Verify Tensorflow Model Via UI (OVMS on Kserve) [Documentation] Test the deployment of a tensorflow (.pb) model in Kserve using OVMS [Tags] Sanity Tier1 ... ODS-2627 @@ -64,7 +69,7 @@ Verify Tensorflow Model Via UI (Kserve) ... Run Keyword If Test Failed Get Kserve Events And Logs ... model_name=${MODEL_NAME} project_title=${PRJ_TITLE} -Test Onnx Model Via UI (Kserve) +Test Onnx Model Via UI (OVMS on Kserve) [Documentation] Test the deployment of an onnx model in Kserve using OVMS [Tags] Sanity Tier1 ... ODS-2628 @@ -85,7 +90,7 @@ Test Onnx Model Via UI (Kserve) ... Run Keyword If Test Failed Get Kserve Events And Logs ... model_name=${SECURED_MODEL} project_title=${SECOND_PROJECT} -Verify Multiple Projects With Same Model (Kserve) +Verify Multiple Projects With Same Model (OVMS on Kserve) [Documentation] Test the deployment of multiple DS project with same openvino_ir model (kserve) [Tags] Sanity ... ODS-2629 RHOAIENG-549 @@ -95,6 +100,46 @@ Verify Multiple Projects With Same Model (Kserve) ... Run Keyword If Test Failed Get Kserve Events And Logs ... model_name=${MODEL_NAME} project_title=${PRJ_TITLE} +Verify GPU Model Deployment Via UI (OVMS on Kserve) + [Documentation] Test the deployment of an openvino_ir model on a model server with GPUs attached + [Tags] Sanity Tier1 Resources-GPU + ... ODS-XXXX + Clean All Models Of Current User + Open Data Science Projects Home Page + Wait For RHODS Dashboard To Load wait_for_cards=${FALSE} expected_page=Data Science Projects + Create Data Science Project title=${PRJ_TITLE_GPU} description=${PRJ_DESCRIPTION_GPU} + Create S3 Data Connection project_title=${PRJ_TITLE_GPU} dc_name=model-serving-connection + ... aws_access_key=${S3.AWS_ACCESS_KEY_ID} aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY} + ... aws_bucket_name=ods-ci-s3 + Deploy Kserve Model Via UI model_name=${MODEL_NAME_GPU} serving_runtime=OpenVINO Model Server + ... data_connection=model-serving-connection path=vehicle-detection-kserve model_framework=openvino_ir + ... no_gpus=1 + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} + ... namespace=${PRJ_TITLE_GPU} + Verify Displayed GPU Count In Single Model Serving model_name=${MODEL_NAME_GPU} no_gpus=1 + ${requests} = Get Container Requests namespace=${PRJ_TITLE_GPU} + ... label=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} container_name=kserve-container + Should Contain ${requests} "nvidia.com/gpu": "1" + ${node} = Get Node Pod Is Running On namespace=${PRJ_TITLE_GPU} + ... label=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} + ${type} = Get Instance Type Of Node ${node} + Should Be Equal As Strings ${type} "g4dn.xlarge" + Verify Model Status ${MODEL_NAME_GPU} success + Set Suite Variable ${MODEL_CREATED} True + ${url} ${kserve}= Get Model Route via UI ${MODEL_NAME_GPU} + Send Random Inference Request endpoint=${url} no_requests=100 + # Verify metric DCGM_FI_PROF_GR_ENGINE_ACTIVE goes over 0 + ${prometheus_route}= Get OpenShift Prometheus Route + ${sa_token}= Get OpenShift Prometheus Service Account Token + ${expression}= Set Variable DCGM_FI_PROF_GR_ENGINE_ACTIVE + ${resp}= Prometheus.Run Query ${prometheus_route} ${sa_token} ${expression} + Log DCGM_FI_PROF_GR_ENGINE_ACTIVE: ${resp.json()["data"]["result"][0]["value"][-1]} + Should Be True ${resp.json()["data"]["result"][0]["value"][-1]} > ${0} + [Teardown] Run Keywords Clean All Models Of Current User AND + ... Run Keyword If Test Failed Get Kserve Events And Logs + ... model_name=${MODEL_NAME_GPU} project_title=${PRJ_TITLE_GPU} + + *** Keywords *** OVMS On Kserve Suite Setup [Documentation] Suite setup steps for testing DSG. It creates some test variables From 5f0e167c6560233dedd1ee827e3d6d6a47a0b7bc Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Fri, 16 Feb 2024 17:00:22 +0100 Subject: [PATCH 2/9] add missing keywords Signed-off-by: Luca Giorgi --- .../425__model_serving_ovms_on_kserve.robot | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot index dfed472a0..de80f0d84 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot @@ -193,3 +193,15 @@ OVMS On Kserve Suite Teardown Remove File openshift_ca_istio_knative.crt SeleniumLibrary.Close All Browsers RHOSi Teardown + +Get OpenShift Prometheus Route + [Documentation] Fetches the route for the Prometheus instance of openshift-monitoring + ${host}= Run oc get route prometheus-k8s -n openshift-monitoring -o json | jq '.status.ingress[].host' + ${host}= Strip String ${host} characters=" + ${route}= Catenate SEPARATOR= https:// ${host} + RETURN ${route} + +Get OpenShift Prometheus Service Account Token + [Documentation] Returns a token for a service account to be used with Prometheus + ${token}= Run oc create token prometheus-k8s -n openshift-monitoring --duration 10m + RETURN ${token} From e36dc7e4fc3e9a686671d2e04b8a51da74cec624 Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Fri, 16 Feb 2024 17:02:20 +0100 Subject: [PATCH 3/9] fix variables typo Signed-off-by: Luca Giorgi --- .../420__model_serving/425__model_serving_ovms_on_kserve.robot | 2 -- 1 file changed, 2 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot index de80f0d84..3ea2044a2 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot @@ -26,11 +26,9 @@ ${SECOND_PROJECT}= sec-model-serving-project ${SECURED_MODEL}= test-model-secured ${EXPECTED_INFERENCE_SECURED_OUTPUT}= {"model_name":"${SECURED_MODEL}__isvc-83d6fab7bd","model_version":"1","outputs":[{"name":"Plus214_Output_0","datatype":"FP32","shape":[1,10],"data":[-8.233053,-7.7497034,-3.4236815,12.3630295,-12.079103,17.266596,-10.570976,0.7130762,3.321715,1.3621228]}]} #robocop: disable ${EXPECTED_INFERENCE_OUTPUT_OPENVINO}= {"model_name":"${MODEL_NAME}__isvc-8655dc7979","model_version":"1","outputs":[{"name":"Func/StatefulPartitionedCall/output/_13:0","datatype":"FP32","shape":[1,1],"data":[0.99999994]}]} #robocop: disable - ${PRJ_TITLE_GPU}= model-serving-project-gpu ${PRJ_DESCRIPTION_GPU}= project used for model serving tests (with GPUs) ${MODEL_NAME_GPU}= vehicle-detection -${MODEL_CREATED}= ${FALSE} ${RUNTIME_NAME_GPU}= Model Serving GPU Test *** Test Cases *** From d61f435b14a6272ab0ff7d3ff3ff5c686610d406 Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Fri, 16 Feb 2024 17:03:33 +0100 Subject: [PATCH 4/9] remove unused variable Signed-off-by: Luca Giorgi --- .../420__model_serving/425__model_serving_ovms_on_kserve.robot | 1 - 1 file changed, 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot index 3ea2044a2..6957d2815 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot @@ -29,7 +29,6 @@ ${EXPECTED_INFERENCE_OUTPUT_OPENVINO}= {"model_name":"${MODEL_NAME}__isvc-865 ${PRJ_TITLE_GPU}= model-serving-project-gpu ${PRJ_DESCRIPTION_GPU}= project used for model serving tests (with GPUs) ${MODEL_NAME_GPU}= vehicle-detection -${RUNTIME_NAME_GPU}= Model Serving GPU Test *** Test Cases *** Verify Openvino_IR Model Via UI (OVMS on Kserve) From 8cecdb21f2cc359820fd87442e40315d3b57462d Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Fri, 16 Feb 2024 17:20:17 +0100 Subject: [PATCH 5/9] robocop warnings Signed-off-by: Luca Giorgi --- .../ODHDataScienceProject/ModelServer.resource | 10 ++++++---- .../425__model_serving_ovms_on_kserve.robot | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource index b17d3db1e..936c1be8e 100644 --- a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource +++ b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource @@ -155,14 +155,16 @@ Verify Displayed GPU Count In Single Model Serving [Documentation] Verifies the number of GPUs displayed in the Model Server table of single model serving matches ... the expected value [Arguments] ${model_name} ${no_gpus} - Wait Until Page Contains Element xpath://td[@data-label="Name"]/div[text()="${model_name}"] + SeleniumLibrary.Wait Until Page Contains Element xpath://td[@data-label="Name"]/div[text()="${model_name}"] ${expanded}= Run Keyword And Return Status Page Should Contain Element ... xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="true"] - IF ${expanded}==False - Click Element xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="true"] + IF not ${expanded} + SeleniumLibrary.Click Element + ... xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="true"] END Sleep 5s reason=Sometimes the number of current Accelerators take a few seconds to update - ${current_accs}= Get Text xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../../../tr[last()]//span[.="Number of accelerators"]/../../dd/div #robocop: disable + ${current_accs}= SeleniumLibrary.Get Text + ... xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../../../tr[last()]//span[.="Number of accelerators"]/../../dd/div #robocop: disable Should Match ${current_accs} ${no_gpus} Set Model Server Runtime diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot index 6957d2815..54aebf938 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot @@ -114,12 +114,12 @@ Verify GPU Model Deployment Via UI (OVMS on Kserve) Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} ... namespace=${PRJ_TITLE_GPU} Verify Displayed GPU Count In Single Model Serving model_name=${MODEL_NAME_GPU} no_gpus=1 - ${requests} = Get Container Requests namespace=${PRJ_TITLE_GPU} + ${requests}= Get Container Requests namespace=${PRJ_TITLE_GPU} ... label=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} container_name=kserve-container Should Contain ${requests} "nvidia.com/gpu": "1" - ${node} = Get Node Pod Is Running On namespace=${PRJ_TITLE_GPU} + ${node}= Get Node Pod Is Running On namespace=${PRJ_TITLE_GPU} ... label=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} - ${type} = Get Instance Type Of Node ${node} + ${type}= Get Instance Type Of Node ${node} Should Be Equal As Strings ${type} "g4dn.xlarge" Verify Model Status ${MODEL_NAME_GPU} success Set Suite Variable ${MODEL_CREATED} True From 69801057ba038f73b756121445a15afbf22f32d1 Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Fri, 16 Feb 2024 18:08:48 +0100 Subject: [PATCH 6/9] fix keyword bug Signed-off-by: Luca Giorgi --- .../ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource index 936c1be8e..fee5c2798 100644 --- a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource +++ b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource @@ -160,7 +160,7 @@ Verify Displayed GPU Count In Single Model Serving ... xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="true"] IF not ${expanded} SeleniumLibrary.Click Element - ... xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="true"] + ... xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="false"] END Sleep 5s reason=Sometimes the number of current Accelerators take a few seconds to update ${current_accs}= SeleniumLibrary.Get Text From c36e1b945b15de664fff7ff26d9c206db845e4b9 Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Fri, 16 Feb 2024 18:50:40 +0100 Subject: [PATCH 7/9] Fetch knative CA if needed Signed-off-by: Luca Giorgi --- .../425__model_serving_ovms_on_kserve.robot | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot index 54aebf938..7838b2531 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot @@ -59,6 +59,9 @@ Verify Tensorflow Model Via UI (OVMS on Kserve) Verify Model Status ${MODEL_NAME} success Set Suite Variable ${MODEL_CREATED} ${TRUE} ${url} ${kserve}= Get Model Route via UI ${MODEL_NAME} + IF ${kserve} + Fetch Knative CA Certificate filename=openshift_ca_istio_knative.crt + END ${status_code} ${response_text}= Send Random Inference Request endpoint=${url} name=input ... shape={"B": 1, "H": 299, "W": 299, "C": 3} no_requests=1 Should Be Equal As Strings ${status_code} 200 @@ -124,6 +127,9 @@ Verify GPU Model Deployment Via UI (OVMS on Kserve) Verify Model Status ${MODEL_NAME_GPU} success Set Suite Variable ${MODEL_CREATED} True ${url} ${kserve}= Get Model Route via UI ${MODEL_NAME_GPU} + IF ${kserve} + Fetch Knative CA Certificate filename=openshift_ca_istio_knative.crt + END Send Random Inference Request endpoint=${url} no_requests=100 # Verify metric DCGM_FI_PROF_GR_ENGINE_ACTIVE goes over 0 ${prometheus_route}= Get OpenShift Prometheus Route From f2928b8d5f0d119552667727215df982eb7de652 Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Mon, 19 Feb 2024 17:17:00 +0100 Subject: [PATCH 8/9] addressing comments Signed-off-by: Luca Giorgi --- .../Page/ODH/Monitoring/Monitoring.resource | 12 +++++++ .../ODH/ODHDashboard/ODHModelServing.resource | 6 ++-- .../420__model_serving.robot | 4 +-- .../421__model_serving_gpu.robot | 15 ++------ .../425__model_serving_ovms_on_kserve.robot | 34 +++++-------------- 5 files changed, 28 insertions(+), 43 deletions(-) diff --git a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource index 5507240da..7e026d1ad 100644 --- a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource +++ b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource @@ -181,3 +181,15 @@ Metrics Should Exist In UserWorkloadMonitoring Append To List ${metrics} @{metrics_names} END RETURN ${metrics} + +Get OpenShift Prometheus Route + [Documentation] Fetches the route for the Prometheus instance of openshift-monitoring + ${host}= Run oc get route prometheus-k8s -n openshift-monitoring -o json | jq '.status.ingress[].host' + ${host}= Strip String ${host} characters=" + ${route}= Catenate SEPARATOR= https:// ${host} + RETURN ${route} + +Get OpenShift Prometheus Service Account Token + [Documentation] Returns a token for a service account to be used with Prometheus + ${token}= Run oc create token prometheus-k8s -n openshift-monitoring --duration 10m + RETURN ${token} diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource index 458775e5d..a2c8bc8f5 100644 --- a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource +++ b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource @@ -202,7 +202,7 @@ Open Model Edit Modal ODHDashboard.Click Action From Actions Menu item_title=${model_name} action=Edit Wait Until Page Contains xpath://h1[.="Deploy model"] -Get Model Route via UI +Get Model Route Via UI [Documentation] Grabs the serving route (URL) of an already deployed model from the Model Serving page. [Arguments] ${model_name} # TODO: Open model serving home page if needed? @@ -218,7 +218,7 @@ Get Model Route via UI IF ${kserve} ${url}= Catenate SEPARATOR= ${url} /v2/models/${model_name}/infer END - RETURN ${url} ${kserve} + RETURN ${url} Get Model Route Via CLI [Documentation] Grabs the serving route (URL) of an already deployed model from CLI. @@ -262,7 +262,7 @@ Get Model Inference [Arguments] ${model_name} ${inference_input} ${token_auth}=${FALSE} ... ${project_title}=${NONE} ${self_managed}= Is RHODS Self-Managed - ${url} ${kserve}= Get Model Route via UI ${model_name} + ${url}= Get Model Route Via UI ${model_name} ${curl_cmd}= Set Variable curl -s ${url} -d ${inference_input} IF ${token_auth} IF "${project_title}" == "${NONE}" diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/420__model_serving.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/420__model_serving.robot index 594ca3513..420d5ef36 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/420__model_serving.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/420__model_serving.robot @@ -77,7 +77,7 @@ Verify Tensorflow Model Via UI Wait Until Keyword Succeeds 5 min 10 sec Verify Serving Service Verify Model Status ${MODEL_NAME} success Set Suite Variable ${MODEL_CREATED} ${TRUE} - ${url} ${kserve}= Get Model Route via UI ${MODEL_NAME} + ${url}= Get Model Route Via UI ${MODEL_NAME} ${status_code} ${response_text}= Send Random Inference Request endpoint=${url} name=input ... shape={"B": 1, "H": 299, "W": 299, "C": 3} no_requests=1 Should Be Equal As Strings ${status_code} 200 @@ -159,7 +159,7 @@ Verify Editing Existing Model Deployment Wait Until Keyword Succeeds 5 min 10 sec Verify Serving Service Verify Model Status ${MODEL_NAME} success Set Suite Variable ${MODEL_CREATED} ${TRUE} - ${url} ${kserve}= Get Model Route via UI ${MODEL_NAME} + ${url}= Get Model Route Via UI ${MODEL_NAME} ${status_code} ${response_text}= Send Random Inference Request endpoint=${url} name=input ... shape={"B": 1, "H": 299, "W": 299, "C": 3} no_requests=1 Should Be Equal As Strings ${status_code} 200 diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/421__model_serving_gpu.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/421__model_serving_gpu.robot index 6b6ecf480..677d84d35 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/421__model_serving_gpu.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/421__model_serving_gpu.robot @@ -6,6 +6,7 @@ Resource ../../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resou Resource ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource Resource ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource Resource ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource +Resource ../../../Resources/Page/ODH/Monitoring/Monitoring.resource Resource ../../../Resources/CLI/ModelServing/modelmesh.resource Suite Setup Model Serving Suite Setup Suite Teardown Model Serving Suite Teardown @@ -58,7 +59,7 @@ Test Inference Load On GPU [Documentation] Test the inference load on the GPU after sending random requests to the endpoint [Tags] Sanity Tier1 Resources-GPU ... ODS-2213 - ${url} ${kserve}= Get Model Route via UI ${MODEL_NAME} + ${url}= Get Model Route Via UI ${MODEL_NAME} Send Random Inference Request endpoint=${url} no_requests=100 # Verify metric DCGM_FI_PROF_GR_ENGINE_ACTIVE goes over 0 ${prometheus_route}= Get OpenShift Prometheus Route @@ -94,15 +95,3 @@ Model Serving Suite Teardown END Close All Browsers RHOSi Teardown - -Get OpenShift Prometheus Route - [Documentation] Fetches the route for the Prometheus instance of openshift-monitoring - ${host}= Run oc get route prometheus-k8s -n openshift-monitoring -o json | jq '.status.ingress[].host' - ${host}= Strip String ${host} characters=" - ${route}= Catenate SEPARATOR= https:// ${host} - RETURN ${route} - -Get OpenShift Prometheus Service Account Token - [Documentation] Returns a token for a service account to be used with Prometheus - ${token}= Run oc create token prometheus-k8s -n openshift-monitoring --duration 10m - RETURN ${token} diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot index 7838b2531..16c797e7b 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot @@ -7,6 +7,7 @@ Resource ../../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resou Resource ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource Resource ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource Resource ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource +Resource ../../../Resources/Page/ODH/Monitoring/Monitoring.resource Resource ../../../Resources/OCP.resource Resource ../../../Resources/CLI/ModelServing/modelmesh.resource Suite Setup OVMS On Kserve Suite Setup @@ -58,10 +59,7 @@ Verify Tensorflow Model Via UI (OVMS on Kserve) ... namespace=${PRJ_TITLE} Verify Model Status ${MODEL_NAME} success Set Suite Variable ${MODEL_CREATED} ${TRUE} - ${url} ${kserve}= Get Model Route via UI ${MODEL_NAME} - IF ${kserve} - Fetch Knative CA Certificate filename=openshift_ca_istio_knative.crt - END + ${url}= Get Model Route Via UI ${MODEL_NAME} ${status_code} ${response_text}= Send Random Inference Request endpoint=${url} name=input ... shape={"B": 1, "H": 299, "W": 299, "C": 3} no_requests=1 Should Be Equal As Strings ${status_code} 200 @@ -103,7 +101,8 @@ Verify Multiple Projects With Same Model (OVMS on Kserve) Verify GPU Model Deployment Via UI (OVMS on Kserve) [Documentation] Test the deployment of an openvino_ir model on a model server with GPUs attached [Tags] Sanity Tier1 Resources-GPU - ... ODS-XXXX + ... ODS-2630 ODS-2631 ProductBug RHOAIENG-3355 + ${requests}= Create Dictionary nvidia.com/gpu=1 Clean All Models Of Current User Open Data Science Projects Home Page Wait For RHODS Dashboard To Load wait_for_cards=${FALSE} expected_page=Data Science Projects @@ -117,19 +116,16 @@ Verify GPU Model Deployment Via UI (OVMS on Kserve) Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} ... namespace=${PRJ_TITLE_GPU} Verify Displayed GPU Count In Single Model Serving model_name=${MODEL_NAME_GPU} no_gpus=1 - ${requests}= Get Container Requests namespace=${PRJ_TITLE_GPU} - ... label=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} container_name=kserve-container - Should Contain ${requests} "nvidia.com/gpu": "1" + Container Hardware Resources Should Match Expected container_name=kserve-container + ... pod_label_selector=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} + ... namespace=${PRJ_TITLE_GPU} exp_requests=${requests} ${node}= Get Node Pod Is Running On namespace=${PRJ_TITLE_GPU} ... label=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} ${type}= Get Instance Type Of Node ${node} Should Be Equal As Strings ${type} "g4dn.xlarge" Verify Model Status ${MODEL_NAME_GPU} success Set Suite Variable ${MODEL_CREATED} True - ${url} ${kserve}= Get Model Route via UI ${MODEL_NAME_GPU} - IF ${kserve} - Fetch Knative CA Certificate filename=openshift_ca_istio_knative.crt - END + ${url}= Get Model Route Via UI ${MODEL_NAME_GPU} Send Random Inference Request endpoint=${url} no_requests=100 # Verify metric DCGM_FI_PROF_GR_ENGINE_ACTIVE goes over 0 ${prometheus_route}= Get OpenShift Prometheus Route @@ -152,7 +148,7 @@ OVMS On Kserve Suite Setup RHOSi Setup Launch Dashboard ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} ... ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} - Fetch CA Certificate If RHODS Is Self-Managed + Fetch Knative CA Certificate filename=openshift_ca_istio_knative.crt Clean All Models Of Current User Create Openvino Models For Kserve @@ -196,15 +192,3 @@ OVMS On Kserve Suite Teardown Remove File openshift_ca_istio_knative.crt SeleniumLibrary.Close All Browsers RHOSi Teardown - -Get OpenShift Prometheus Route - [Documentation] Fetches the route for the Prometheus instance of openshift-monitoring - ${host}= Run oc get route prometheus-k8s -n openshift-monitoring -o json | jq '.status.ingress[].host' - ${host}= Strip String ${host} characters=" - ${route}= Catenate SEPARATOR= https:// ${host} - RETURN ${route} - -Get OpenShift Prometheus Service Account Token - [Documentation] Returns a token for a service account to be used with Prometheus - ${token}= Run oc create token prometheus-k8s -n openshift-monitoring --duration 10m - RETURN ${token} From cade864f5044f5cddc2a398c32db36b77a7329ef Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Mon, 19 Feb 2024 17:42:20 +0100 Subject: [PATCH 9/9] Fix limit check in container Signed-off-by: Luca Giorgi --- .../420__model_serving/425__model_serving_ovms_on_kserve.robot | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot index 16c797e7b..cfb2a6f24 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot @@ -103,6 +103,7 @@ Verify GPU Model Deployment Via UI (OVMS on Kserve) [Tags] Sanity Tier1 Resources-GPU ... ODS-2630 ODS-2631 ProductBug RHOAIENG-3355 ${requests}= Create Dictionary nvidia.com/gpu=1 + ${limits}= Create Dictionary nvidia.com/gpu=1 Clean All Models Of Current User Open Data Science Projects Home Page Wait For RHODS Dashboard To Load wait_for_cards=${FALSE} expected_page=Data Science Projects @@ -118,7 +119,7 @@ Verify GPU Model Deployment Via UI (OVMS on Kserve) Verify Displayed GPU Count In Single Model Serving model_name=${MODEL_NAME_GPU} no_gpus=1 Container Hardware Resources Should Match Expected container_name=kserve-container ... pod_label_selector=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} - ... namespace=${PRJ_TITLE_GPU} exp_requests=${requests} + ... namespace=${PRJ_TITLE_GPU} exp_requests=${requests} exp_limits=${limits} ${node}= Get Node Pod Is Running On namespace=${PRJ_TITLE_GPU} ... label=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU} ${type}= Get Instance Type Of Node ${node}