red-hat-data-services · lugi0 · Feb 20, 2024 · Feb 16, 2024 · Feb 16, 2024 · Feb 16, 2024
diff --git a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource
@@ -181,3 +181,15 @@ Metrics Should Exist In UserWorkloadMonitoring
         Append To List    ${metrics}    @{metrics_names}
     END
     RETURN    ${metrics}
+
+Get OpenShift Prometheus Route
+    [Documentation]    Fetches the route for the Prometheus instance of openshift-monitoring
+    ${host}=    Run    oc get route prometheus-k8s -n openshift-monitoring -o json | jq '.status.ingress[].host'
+    ${host}=    Strip String    ${host}    characters="
+    ${route}=    Catenate    SEPARATOR=    https://    ${host}
+    RETURN    ${route}
+
+Get OpenShift Prometheus Service Account Token
+    [Documentation]    Returns a token for a service account to be used with Prometheus
+    ${token}=    Run    oc create token prometheus-k8s -n openshift-monitoring --duration 10m
+    RETURN    ${token}
diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource
@@ -151,6 +151,22 @@ Verify Displayed GPU Count
     ${current_accs}=  Get Text  xpath://span[text()="${server_name}"]/../../../following-sibling::tr//td[@data-label]/div/dl/div[4]/dd/div
     Should Match  ${current_accs}  ${no_gpus}
 
+Verify Displayed GPU Count In Single Model Serving
+    [Documentation]    Verifies the number of GPUs displayed in the Model Server table of single model serving matches
+    ...    the expected value
+    [Arguments]    ${model_name}    ${no_gpus}
+    SeleniumLibrary.Wait Until Page Contains Element    xpath://td[@data-label="Name"]/div[text()="${model_name}"]
+    ${expanded}=    Run Keyword And Return Status    Page Should Contain Element
+    ...    xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="true"]
+    IF    not ${expanded}
+        SeleniumLibrary.Click Element
+        ...    xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../..//button[@aria-expanded="false"]
+    END
+    Sleep    5s    reason=Sometimes the number of current Accelerators take a few seconds to update
+    ${current_accs}=    SeleniumLibrary.Get Text
+    ...    xpath://td[@data-label="Name"]/div[text()="${model_name}"]/../../../tr[last()]//span[.="Number of accelerators"]/../../dd/div  #robocop: disable
+    Should Match    ${current_accs}    ${no_gpus}
+
 Set Model Server Runtime
     [Documentation]    Selects a given Runtime for the model server
     [Arguments]    ${runtime}

diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource
@@ -202,7 +202,7 @@ Open Model Edit Modal
     ODHDashboard.Click Action From Actions Menu    item_title=${model_name}    action=Edit
     Wait Until Page Contains    xpath://h1[.="Deploy model"]
 
-Get Model Route via UI
+Get Model Route Via UI
     [Documentation]    Grabs the serving route (URL) of an already deployed model from the Model Serving page.
     [Arguments]    ${model_name}
     # TODO: Open model serving home page if needed?
@@ -218,7 +218,7 @@ Get Model Route via UI
     IF    ${kserve}
         ${url}=    Catenate    SEPARATOR=    ${url}    /v2/models/${model_name}/infer
     END
-    RETURN    ${url}    ${kserve}
+    RETURN    ${url}
 
 Get Model Route Via CLI
     [Documentation]    Grabs the serving route (URL) of an already deployed model from CLI.
@@ -262,7 +262,7 @@ Get Model Inference
     [Arguments]    ${model_name}    ${inference_input}    ${token_auth}=${FALSE}
     ...    ${project_title}=${NONE}
     ${self_managed}=    Is RHODS Self-Managed
-    ${url}    ${kserve}=    Get Model Route via UI    ${model_name}
+    ${url}=    Get Model Route Via UI    ${model_name}
     ${curl_cmd}=     Set Variable    curl -s ${url} -d ${inference_input}
     IF    ${token_auth}
         IF    "${project_title}" == "${NONE}"

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/420__model_serving.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/420__model_serving.robot
@@ -77,7 +77,7 @@ Verify Tensorflow Model Via UI
     Wait Until Keyword Succeeds    5 min  10 sec  Verify Serving Service
     Verify Model Status    ${MODEL_NAME}    success
     Set Suite Variable    ${MODEL_CREATED}    ${TRUE}
-    ${url}    ${kserve}=    Get Model Route via UI    ${MODEL_NAME}
+    ${url}=    Get Model Route Via UI    ${MODEL_NAME}
     ${status_code}    ${response_text}=    Send Random Inference Request     endpoint=${url}    name=input
     ...    shape={"B": 1, "H": 299, "W": 299, "C": 3}    no_requests=1
     Should Be Equal As Strings    ${status_code}    200
@@ -159,7 +159,7 @@ Verify Editing Existing Model Deployment
     Wait Until Keyword Succeeds    5 min  10 sec  Verify Serving Service
     Verify Model Status    ${MODEL_NAME}    success
     Set Suite Variable    ${MODEL_CREATED}    ${TRUE}
-    ${url}    ${kserve}=    Get Model Route via UI    ${MODEL_NAME}
+    ${url}=    Get Model Route Via UI    ${MODEL_NAME}
     ${status_code}    ${response_text}=    Send Random Inference Request     endpoint=${url}    name=input
     ...    shape={"B": 1, "H": 299, "W": 299, "C": 3}    no_requests=1
     Should Be Equal As Strings    ${status_code}    200

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/421__model_serving_gpu.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/421__model_serving_gpu.robot
@@ -6,6 +6,7 @@ Resource          ../../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resou
 Resource          ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource
 Resource          ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource
 Resource          ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource
+Resource          ../../../Resources/Page/ODH/Monitoring/Monitoring.resource
 Resource          ../../../Resources/CLI/ModelServing/modelmesh.resource
 Suite Setup       Model Serving Suite Setup
 Suite Teardown    Model Serving Suite Teardown
@@ -58,7 +59,7 @@ Test Inference Load On GPU
     [Documentation]    Test the inference load on the GPU after sending random requests to the endpoint
     [Tags]    Sanity    Tier1    Resources-GPU
     ...    ODS-2213
-    ${url}    ${kserve}=    Get Model Route via UI    ${MODEL_NAME}
+    ${url}=    Get Model Route Via UI    ${MODEL_NAME}
     Send Random Inference Request     endpoint=${url}    no_requests=100
     # Verify metric DCGM_FI_PROF_GR_ENGINE_ACTIVE goes over 0
     ${prometheus_route}=    Get OpenShift Prometheus Route
@@ -94,15 +95,3 @@ Model Serving Suite Teardown
     END
     Close All Browsers
     RHOSi Teardown
-
-Get OpenShift Prometheus Route
-    [Documentation]    Fetches the route for the Prometheus instance of openshift-monitoring
-    ${host}=    Run    oc get route prometheus-k8s -n openshift-monitoring -o json | jq '.status.ingress[].host'
-    ${host}=    Strip String    ${host}    characters="
-    ${route}=    Catenate    SEPARATOR=    https://    ${host}
-    RETURN    ${route}
-
-Get OpenShift Prometheus Service Account Token
-    [Documentation]    Returns a token for a service account to be used with Prometheus
-    ${token}=    Run    oc create token prometheus-k8s -n openshift-monitoring --duration 10m
-    RETURN    ${token}
diff --git a/...tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/...tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot
@@ -7,6 +7,7 @@
 Resource          ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource
 Resource          ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource
 Resource          ../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource
+Resource          ../../../Resources/Page/ODH/Monitoring/Monitoring.resource
 Resource          ../../../Resources/OCP.resource
 Resource          ../../../Resources/CLI/ModelServing/modelmesh.resource
 Suite Setup       OVMS On Kserve Suite Setup
@@ -26,10 +27,12 @@
 ${SECURED_MODEL}=    test-model-secured
 ${EXPECTED_INFERENCE_SECURED_OUTPUT}=    {"model_name":"${SECURED_MODEL}__isvc-83d6fab7bd","model_version":"1","outputs":[{"name":"Plus214_Output_0","datatype":"FP32","shape":[1,10],"data":[-8.233053,-7.7497034,-3.4236815,12.3630295,-12.079103,17.266596,-10.570976,0.7130762,3.321715,1.3621228]}]}  #robocop: disable
 ${EXPECTED_INFERENCE_OUTPUT_OPENVINO}=    {"model_name":"${MODEL_NAME}__isvc-8655dc7979","model_version":"1","outputs":[{"name":"Func/StatefulPartitionedCall/output/_13:0","datatype":"FP32","shape":[1,1],"data":[0.99999994]}]}  #robocop: disable
-
+${PRJ_TITLE_GPU}=    model-serving-project-gpu
+${PRJ_DESCRIPTION_GPU}=    project used for model serving tests (with GPUs)
+${MODEL_NAME_GPU}=    vehicle-detection
 
 *** Test Cases ***
-Verify Openvino_IR Model Via UI (Kserve)
+Verify Openvino_IR Model Via UI (OVMS on Kserve)
     [Documentation]    Test the deployment of an openvino_ir model in Kserve using OVMS
     [Tags]    Smoke
     ...       ODS-2626
@@ -41,7 +44,7 @@
     ...    Run Keyword If Test Failed    Get Kserve Events And Logs
     ...    model_name=${MODEL_NAME}    project_title=${PRJ_TITLE}
 
-Verify Tensorflow Model Via UI (Kserve)
+Verify Tensorflow Model Via UI (OVMS on Kserve)
     [Documentation]    Test the deployment of a tensorflow (.pb) model in Kserve using OVMS
     [Tags]    Sanity    Tier1
     ...       ODS-2627
@@ -56,15 +59,15 @@
     ...    namespace=${PRJ_TITLE}
     Verify Model Status    ${MODEL_NAME}    success
     Set Suite Variable    ${MODEL_CREATED}    ${TRUE}
-    ${url}    ${kserve}=    Get Model Route via UI    ${MODEL_NAME}
+    ${url}=    Get Model Route Via UI    ${MODEL_NAME}
     ${status_code}    ${response_text}=    Send Random Inference Request     endpoint=${url}    name=input
     ...    shape={"B": 1, "H": 299, "W": 299, "C": 3}    no_requests=1
     Should Be Equal As Strings    ${status_code}    200
     [Teardown]    Run Keywords    Clean All Models Of Current User    AND
     ...    Run Keyword If Test Failed    Get Kserve Events And Logs
     ...    model_name=${MODEL_NAME}    project_title=${PRJ_TITLE}
 
-Test Onnx Model Via UI (Kserve)
+Test Onnx Model Via UI (OVMS on Kserve)
     [Documentation]    Test the deployment of an onnx model in Kserve using OVMS
     [Tags]    Sanity    Tier1
     ...       ODS-2628
@@ -85,7 +88,7 @@
     ...    Run Keyword If Test Failed    Get Kserve Events And Logs
     ...    model_name=${SECURED_MODEL}    project_title=${SECOND_PROJECT}
 
-Verify Multiple Projects With Same Model (Kserve)
+Verify Multiple Projects With Same Model (OVMS on Kserve)
     [Documentation]    Test the deployment of multiple DS project with same openvino_ir model (kserve)
     [Tags]    Sanity
     ...       ODS-2629    RHOAIENG-549
@@ -95,6 +98,47 @@
     ...    Run Keyword If Test Failed    Get Kserve Events And Logs
     ...    model_name=${MODEL_NAME}    project_title=${PRJ_TITLE}
 
+Verify GPU Model Deployment Via UI (OVMS on Kserve)
+    [Documentation]    Test the deployment of an openvino_ir model on a model server with GPUs attached
+    [Tags]    Sanity    Tier1    Resources-GPU
+    ...       ODS-2630    ODS-2631    ProductBug    RHOAIENG-3355
+    ${requests}=    Create Dictionary    nvidia.com/gpu=1
+    Clean All Models Of Current User
+    Open Data Science Projects Home Page
+    Wait For RHODS Dashboard To Load    wait_for_cards=${FALSE}    expected_page=Data Science Projects
+    Create Data Science Project    title=${PRJ_TITLE_GPU}    description=${PRJ_DESCRIPTION_GPU}
+    Create S3 Data Connection    project_title=${PRJ_TITLE_GPU}    dc_name=model-serving-connection
+    ...            aws_access_key=${S3.AWS_ACCESS_KEY_ID}    aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY}
+    ...            aws_bucket_name=ods-ci-s3
+    Deploy Kserve Model Via UI    model_name=${MODEL_NAME_GPU}    serving_runtime=OpenVINO Model Server
+    ...    data_connection=model-serving-connection    path=vehicle-detection-kserve    model_framework=openvino_ir
+    ...    no_gpus=1
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU}
+    ...    namespace=${PRJ_TITLE_GPU}
+    Verify Displayed GPU Count In Single Model Serving    model_name=${MODEL_NAME_GPU}    no_gpus=1
+    Container Hardware Resources Should Match Expected    container_name=kserve-container
+    ...    pod_label_selector=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU}
+    ...    namespace=${PRJ_TITLE_GPU}    exp_requests=${requests}
+    ${node}=    Get Node Pod Is Running On    namespace=${PRJ_TITLE_GPU}
+    ...    label=serving.kserve.io/inferenceservice=${MODEL_NAME_GPU}
+    ${type}=    Get Instance Type Of Node    ${node}
+    Should Be Equal As Strings    ${type}    "g4dn.xlarge"
+    Verify Model Status    ${MODEL_NAME_GPU}    success
+    Set Suite Variable    ${MODEL_CREATED}    True
+    ${url}=    Get Model Route Via UI    ${MODEL_NAME_GPU}
+    Send Random Inference Request     endpoint=${url}    no_requests=100
+    # Verify metric DCGM_FI_PROF_GR_ENGINE_ACTIVE goes over 0
+    ${prometheus_route}=    Get OpenShift Prometheus Route
+    ${sa_token}=    Get OpenShift Prometheus Service Account Token
+    ${expression}=    Set Variable    DCGM_FI_PROF_GR_ENGINE_ACTIVE
+    ${resp}=    Prometheus.Run Query    ${prometheus_route}    ${sa_token}    ${expression}
+    Log    DCGM_FI_PROF_GR_ENGINE_ACTIVE: ${resp.json()["data"]["result"][0]["value"][-1]}
+    Should Be True    ${resp.json()["data"]["result"][0]["value"][-1]} > ${0}
+    [Teardown]    Run Keywords    Clean All Models Of Current User    AND
+    ...    Run Keyword If Test Failed    Get Kserve Events And Logs
+    ...    model_name=${MODEL_NAME_GPU}    project_title=${PRJ_TITLE_GPU}
+
+
 *** Keywords ***
 OVMS On Kserve Suite Setup
     [Documentation]    Suite setup steps for testing DSG. It creates some test variables
@@ -104,7 +148,7 @@
     RHOSi Setup
     Launch Dashboard    ${TEST_USER.USERNAME}    ${TEST_USER.PASSWORD}    ${TEST_USER.AUTH_TYPE}
     ...    ${ODH_DASHBOARD_URL}    ${BROWSER.NAME}    ${BROWSER.OPTIONS}
-    Fetch CA Certificate If RHODS Is Self-Managed
+    Fetch Knative CA Certificate    filename=openshift_ca_istio_knative.crt
     Clean All Models Of Current User
 
 Create Openvino Models For Kserve