DIR SPD tests for Trusty AI (#1088)

Verifying SPD and DIR metrics for CLI
red-hat-data-services · Jan 25, 2024 · 30a52f4 · 30a52f4
2 parents ec1005c + ee9b607
commit 30a52f4
Show file tree

Hide file tree

Showing 26 changed files with 333 additions and 51 deletions.
diff --git a/ods_ci/tests/Resources/Common.robot b/ods_ci/tests/Resources/Common.robot
@@ -13,6 +13,8 @@ Resource  RHOSi.resource
 
 *** Variables ***
 @{DEFAULT_CHARS_TO_ESCAPE}=    :    /    .
+${MONITORING_CONFIG_FILEPATH}=       ods_ci/tests/Resources/Files/uwm_cm_conf.yaml
+${UWM_CONFIG_FILEPATH}=              ods_ci/tests/Resources/Files/uwm_cm_enable.yaml
 
 
 *** Keywords ***
@@ -363,7 +365,7 @@ Run And Watch Command
   ${is_test}=    Run keyword And Return Status    Variable Should Exist     ${TEST NAME}
   IF    ${is_test} == ${FALSE}
     ${incremental}=    Generate Random String    5    [NUMBERS]
-    ${TEST NAME}=    Set Variable    testlogs-${incremental}    
+    ${TEST NAME}=    Set Variable    testlogs-${incremental}
   END
   ${process_log} =    Set Variable    ${OUTPUT DIR}/${TEST NAME}.log
   ${temp_log} =    Set Variable    ${TEMPDIR}/${TEST NAME}.log
@@ -412,4 +414,18 @@ Skip If Component Is Not Enabled
     [Documentation]    Skips test if ${component_name} is not enabled in DataScienceCluster
     [Arguments]    ${component_name}
     ${enabled}=    Is Component Enabled    ${component_name}
-    Skip If    "${enabled}" == "false"
+    Skip If    "${enabled}" == "false"
+
+Enable User Workload Monitoring
+    [Documentation]    Enable User Workload Monitoring for the cluster for user-defined-projects
+    ${return_code}    ${output}    Run And Return Rc And Output   oc apply -f ${MONITORING_CONFIG_FILEPATH}
+    Log To Console    ${output}
+    Should Be Equal As Integers    ${return_code}     0   msg=Error while applying the provided file
+
+Configure User Workload Monitoring
+    [Documentation]    Configure the retention period in User Workload Monitoring for the cluster.
+    ...                This period can be configured for the component as and when needed.
+    ${return_code}    ${output}    Run And Return Rc And Output   oc apply -f ${UWM_CONFIG_FILEPATH}
+    Log To Console    ${output}
+    Should Be Equal As Integers    ${return_code}     0   msg=Error while applying the provided file
+
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_1.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_1.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_10.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_10.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_11.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_11.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_12.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_12.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_13.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_13.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_2.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_2.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_3.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_3.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_4.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_4.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_5.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_5.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_6.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_6.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_7.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_7.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_8.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_8.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_9.json b/ods_ci/tests/Resources/Files/TrustyAI/loan_default_batched/batch_9.json
diff --git a/ods_ci/tests/Resources/Files/TrustyAI/trustyai_cr.yaml b/ods_ci/tests/Resources/Files/TrustyAI/trustyai_cr.yaml
@@ -0,0 +1,14 @@
+apiVersion: trustyai.opendatahub.io/v1alpha1
+kind: TrustyAIService
+metadata:
+  name: trustyai-service
+spec:
+  storage:
+    format: "PVC"
+    folder: "/inputs"
+    size: "1Gi"
+  data:
+    filename: "data.csv"
+    format: "CSV"
+  metrics:
+    schedule: "5s"
diff --git a/ods_ci/tests/Resources/Files/llm/uwm_cm_conf.yaml b/ods_ci/tests/Resources/Files/llm/uwm_cm_conf.yaml
diff --git a/ods_ci/tests/Resources/Files/openvino-example-input.json b/ods_ci/tests/Resources/Files/openvino-example-input.json
@@ -1 +1 @@
-{"inputs": [{"name": "Func/StatefulPartitionedCall/input/_0:0", "shape": [1,30], "datatype": "FP32", "data": [[-0.81527562, -0.62780094,  1.18457726, -0.56138278,  1.97545981, -1.38669424, -0.03372776, -1.08378356, -0.46514641, -1.07813139, -2.98031409,  2.29087639, -2.82230106, 0.76695155, -5.65368683, 0.04526619, -4.77118557, -5.04520325, -3.02616084,  1.14274513, 0.35082495,  1.64467922,  0.38254332,  0.03085198, 0.83964697, -0.38594229, -0.51760032,  1.39294962,  0.22815041, 0.3301235]]}]}
+{"inputs": [{"name": "Func/StatefulPartitionedCall/input/_0:0", "shape": [1,30], "datatype": "FP32", "data": [[-0.81527562, -0.62780094,  1.18457726, -0.56138278,  1.97545981, -1.38669424, -0.03372776, -1.08378356, -0.46514641, -1.07813139, -2.98031409,  2.29087639, -2.82230106, 0.76695155, -5.65368683, 0.04526619, -4.77118557, -5.04520325, -3.02616084,  1.14274513, 0.35082495,  1.64467922,  0.38254332,  0.03085198, 0.83964697, -0.38594229, -0.51760032,  1.39294962,  0.22815041, 0.3301235]]}]}
diff --git a/ods_ci/tests/Resources/Files/uwm_cm_conf.yaml b/ods_ci/tests/Resources/Files/uwm_cm_conf.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: user-workload-monitoring-config
+  namespace: openshift-user-workload-monitoring
+data:
+  config.yaml: |
+    prometheus:
+      logLevel: debug
+      retention: 15d #Change as needed
+      volumeClaimTemplate:
+        spec:
+            resources:
+              requests:
+                storage: 40Gi
diff --git a/...ts/Resources/Files/llm/uwm_cm_enable.yaml → .../tests/Resources/Files/uwm_cm_enable.yaml b/...ts/Resources/Files/llm/uwm_cm_enable.yaml → .../tests/Resources/Files/uwm_cm_enable.yaml
diff --git a/ods_ci/tests/Resources/Page/LoginPage.robot b/ods_ci/tests/Resources/Page/LoginPage.robot
@@ -42,7 +42,7 @@ Login To Openshift
     # If here we need to login
     Wait Until Element is Visible  xpath://div[@class="pf-c-login"]  timeout=10s
     ${select_auth_type} =  Does Login Require Authentication Type
-    IF  ${select_auth_type}  Select Login Authentication Type  ${ocp_user_auth_type}
+    IF  ${select_auth_type}  Select Login Authentication Type   ${ocp_user_auth_type}
     Wait Until Page Contains  Log in to your account
     Input Text  id=inputUsername  ${ocp_user_name}
     Input Text  id=inputPassword  ${ocp_user_pw}

diff --git a/ods_ci/tests/Resources/Page/OCPDashboard/Monitoring/Metrics.robot b/ods_ci/tests/Resources/Page/OCPDashboard/Monitoring/Metrics.robot
@@ -3,8 +3,9 @@ Library  SeleniumLibrary
 
 
 *** Variables ***
-${METRICS_QUERY_TEXTAREA}                           xpath=//*[@aria-label='Expression (press Shift+Enter for newlines)']
-${METRICS_QUERY_RESULTS_TABLE_ROW1_VALUE_ELEMENT}   xpath=//td[@data-label='Value']
+${METRICS_QUERY_TEXTAREA}                               xpath=//*[@aria-label='Expression (press Shift+Enter for newlines)']
+${METRICS_QUERY_RESULTS_TABLE_ROW1_VALUE_ELEMENT}       xpath=//td[@data-label='Value']
+${METRICS_QUERY_RESULTS_TABLE_ROW1_VALUE_ELEMENT_XP}    //td[@data-label='Value']
 
 
 *** Keywords ***
@@ -14,7 +15,7 @@ Verify Page Loaded
 
 Verify Query Results Contain Data
   Wait Until Page Contains Element  ${METRICS_QUERY_RESULTS_TABLE_ROW1_VALUE_ELEMENT}  timeout=20  error="Query results don't contain data"
-  ${metrics_query_result_row1_value} =   Get Text  ${METRICS_QUERY_RESULTS_TABLE_ROW1_VALUE_ELEMENT}
+  ${metrics_query_result_row1_value} =   Get Text   xpath:${METRICS_QUERY_RESULTS_TABLE_ROW1_VALUE_ELEMENT_XP}
   Should Be True    '${metrics_query_result_row1_value}' != ''   "Query results don't contain data"
   Should Be True    '${metrics_query_result_row1_value}' != 'None'   "Query results don't contain data"
 
@@ -54,3 +55,32 @@ Get Query Results
         END
     END
     RETURN  ${metrics_query_result_row1_value}
+
+Run OpenShift Metrics Query
+    [Documentation]    Runs a query in the Monitoring section of Open Shift
+    ...    Note: in order to run this keyword OCP_ADMIN_USER.USERNAME needs to
+    ...    belong to a group with "view" role in OpenShift
+    ...    Example command to assign the role: oc adm policy add-cluster-role-to-group view rhods-admins
+    [Arguments]    ${query}   ${username}  ${password}  ${auth_type}   ${retry_attempts}=10    ${return_zero_if_result_empty}=False
+    Open OCP Console
+    LoginPage.Login To Openshift    ocp_user_name=${username}   ocp_user_pw=${password}   ocp_user_auth_type=${auth_type}
+    OCPMenu.Switch To Administrator Perspective
+
+    # In OCP 4.9 metrics are under the Observe menu (it was called Monitoring in 4.8)
+    ${menu_observe_exists} =    Run Keyword and Return Status    Menu.Page Should Contain Menu    Observe
+    IF    ${menu_observe_exists}
+        Menu.Navigate To Page    Observe    Metrics
+    ELSE
+        ${menu_monitoring_exists} =    Run Keyword and Return Status    Menu.Page Should Contain Menu    Monitoring
+        IF    ${menu_monitoring_exists}
+            Menu.Navigate To Page    Monitoring    Metrics
+        ELSE
+            Fail  msg=${username} can't see the Observe/Monitoring section in OpenShift Console, please make sure it belongs to a group with "view" role
+        END
+    END
+
+    Metrics.Verify Page Loaded
+    Metrics.Run Query    ${query}    ${retry_attempts}
+    ${result} =    Metrics.Get Query Results    return_zero_if_result_empty=${return_zero_if_result_empty}
+    RETURN    ${result}
+
diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource
@@ -109,7 +109,7 @@ Select Model Server
     ...    Page Should Contain Element     xpath://span[.="Model servers"]/../../..//button[@aria-label="Options menu"]
     IF    ${selectable}==True
         Open Model servers Options Menu  # robocop:disable
-        Click Element    xpath=//li/button[.="${model_server}"]
+        Click Element    xpath://li/button[.="${model_server}"]
     END
 
 Select Framework
@@ -205,6 +205,13 @@ Get Model Route via UI
     ${url}=    SeleniumLibrary.Get Element Attribute    ${route_xpath}    value
     RETURN    ${url}
 
+Get Model Route Via CLI
+    [Documentation]    Grabs the serving route (URL) of an already deployed model from CLI.
+    [Arguments]    ${model_name}    ${project_name}
+    ${rc}    ${out}=    Run And Return Rc And Output    oc get route ${model_name} -n ${project_name} --template={{.spec.host}}{{.spec.path}}
+    Should Be Equal As Integers	${rc}	 0   msg=Error getting model route
+    RETURN    https://${out}/infer
+
 Open ${section} Options Menu
     [Documentation]    Opens the "Options menu" dropdown for different sections
     ...    Valid sections are: "Name", "Model framework (name - version)", "Project"
@@ -241,7 +248,7 @@ Get Model Inference
     ...    ${project_title}=${NONE}
     ${self_managed} =    Is RHODS Self-Managed
     ${url}=    Get Model Route via UI    ${model_name}
-    ${curl_cmd}=     Set Variable    curl -s ${url} -d ${inference_input}
+    ${curl_cmd}=     Set Variable    curl -s ${url} -d @${inference_input}
     IF    ${token_auth}
         IF    "${project_title}" == "${NONE}"
             ${project_title}=    Get Model Project    ${model_name}
@@ -488,4 +495,4 @@ Set Up Project
         Oc Apply    kind=ConfigMap    src=${UWM_CONFIG_FILEPATH}
     ELSE
         Log    message=Skipping UserWorkloadMonitoring enablement.
-    END
+    END
diff --git a/ods_ci/tests/Tests/100__deploy/100__installation/111__trustyai.robot b/ods_ci/tests/Tests/100__deploy/100__installation/111__trustyai.robot
@@ -0,0 +1,39 @@
+*** Settings ***
+Documentation       Post install test cases that verify OCP KServe resources and objects
+Library             OpenShiftLibrary
+
+
+*** Variables ***
+${TRUSTYAI_NS}=    ${APPLICATIONS_NAMESPACE}
+
+
+*** Test Cases ***
+Verify TrustyAI Operator Installation
+    [Documentation]    Verifies that the TrustyAI operator has been
+    ...    deployed in the ${APPLICATIONS_NAMESPACE} namespace in ODS
+    [Tags]    Smoke
+    ...       Tier1    ODS-2481
+    Run Keyword And Continue On Failure  Wait Until Keyword Succeeds  1 min  10 sec  Verify TrustyAI Deployment
+    Wait Until Keyword Succeeds    10 times  5s    Verify TrustyAI ReplicaSets Info
+    Wait Until Keyword Succeeds    10 times  5s    Verify TrustyAI Container Names
+
+
+*** Keywords ***
+Verify trustyai-service-operator-controller-manager Deployment
+    [Documentation]    Verifies the  deployment of the trustyai operator in the namespace
+    Wait For Pods To Be Ready   label_selector=app.kubernetes.io/created-by=trustyai-service-operator
+    ...    namespace=${APPLICATIONS_NAMESPACE}    exp_replicas=1
+
+Verify TrustyAI ReplicaSets Info
+    [Documentation]    Fetches and verifies information from TrustyAI replicasets
+    @{trustyai_replicasets_info} =    Oc Get    kind=ReplicaSet    api_version=v1    namespace=${TRUSTYAI_NS}
+    ...    label_selector=app.kubernetes.io/part-of=trustyai-service-operator
+    OpenShift Resource Field Value Should Be Equal As Strings    status.readyReplicas
+    ...    1    @{trustyai_replicasets_info}
+
+Verify TrustyAI Container Names
+    [Documentation]  Verifies RHODS TrustyAI deployment
+    @{trustyai} =  Oc Get    kind=Pod    namespace=${TRUSTYAI_NS}    api_version=v1
+    ...    label_selector=app.kubernetes.io/part-of=kserve
+    ${containerNames} =    Create List    kube-rbac-proxy    manager
+    Verify Deployment    ${trustyai}    1    1    ${containerNames}
diff --git a/ods_ci/tests/Tests/200__monitor_and_manage/200__metrics/201__billing_metrics.robot b/ods_ci/tests/Tests/200__monitor_and_manage/200__metrics/201__billing_metrics.robot
@@ -4,6 +4,7 @@ Resource            ../../../Resources/RHOSi.resource
 Resource            ../../../Resources/ODS.robot
 Resource            ../../../Resources/Common.robot
 Resource            ../../../Resources/Page/OCPDashboard/OCPDashboard.resource
+Resource            ../../../Resources/Page/OCPDashboard/Monitoring/Metrics.robot
 Resource            ../../../Resources/Page/ODH/JupyterHub/JupyterLabLauncher.robot
 Resource            ../../../Resources/Page/ODH/JupyterHub/JupyterHubSpawner.robot
 Resource            ../../../Resources/Page/OCPLogin/OCPLogin.resource
@@ -165,35 +166,6 @@ Skip Test If Previous CPU Usage Is Not Zero
         ...    The previos CPU usage is not zero. Current CPU usage: ${metrics_value}. Skiping test
     END
 
-Run OpenShift Metrics Query
-    [Documentation]    Runs a query in the Monitoring section of Open Shift
-    ...    Note: in order to run this keyword OCP_ADMIN_USER.USERNAME needs to
-    ...    belong to a group with "view" role in OpenShift
-    ...    Example command to assign the role: oc adm policy add-cluster-role-to-group view rhods-admins
-    [Arguments]    ${query}    ${retry_attempts}=10    ${return_zero_if_result_empty}=False
-    Open OCP Console
-    LoginPage.Login To Openshift    ${OCP_ADMIN_USER.USERNAME}    ${OCP_ADMIN_USER.PASSWORD}    ${OCP_ADMIN_USER.AUTH_TYPE}
-    OCPMenu.Switch To Administrator Perspective
-
-    # In OCP 4.9 metrics are under the Observe menu (it was called Monitoring in 4.8)
-    ${menu_observe_exists} =    Run Keyword and Return Status    Menu.Page Should Contain Menu    Observe
-    IF    ${menu_observe_exists}
-        Menu.Navigate To Page    Observe    Metrics
-    ELSE
-        ${menu_monitoring_exists} =    Run Keyword and Return Status    Menu.Page Should Contain Menu    Monitoring
-        IF    ${menu_monitoring_exists}
-            Menu.Navigate To Page    Monitoring    Metrics
-        ELSE
-            Fail
-            ...    msg=${OCP_ADMIN_USER.USERNAME} can't see the Observe/Monitoring section in OpenShift Console, please make sure it belongs to a group with "view" role
-        END
-    END
-
-    Metrics.Verify Page Loaded
-    Metrics.Run Query    ${query}    ${retry_attempts}
-    ${result} =    Metrics.Get Query Results    return_zero_if_result_empty=${return_zero_if_result_empty}
-    RETURN    ${result}
-
 Verify Previus CPU Usage Is Greater Than Zero
     [Documentation]     Verifies the cpu usage is greater than zero
     ${metrics_value} =    Run OpenShift Metrics Query    ${METRIC_RHODS_CPU}
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"inputs": [{"name": "Func/StatefulPartitionedCall/input/_0:0", "shape": [1,30], "datatype": "FP32", "data": [[-0.81527562, -0.62780094, 1.18457726, -0.56138278, 1.97545981, -1.38669424, -0.03372776, -1.08378356, -0.46514641, -1.07813139, -2.98031409, 2.29087639, -2.82230106, 0.76695155, -5.65368683, 0.04526619, -4.77118557, -5.04520325, -3.02616084, 1.14274513, 0.35082495, 1.64467922, 0.38254332, 0.03085198, 0.83964697, -0.38594229, -0.51760032, 1.39294962, 0.22815041, 0.3301235]]}]}
		{"inputs": [{"name": "Func/StatefulPartitionedCall/input/_0:0", "shape": [1,30], "datatype": "FP32", "data": [[-0.81527562, -0.62780094, 1.18457726, -0.56138278, 1.97545981, -1.38669424, -0.03372776, -1.08378356, -0.46514641, -1.07813139, -2.98031409, 2.29087639, -2.82230106, 0.76695155, -5.65368683, 0.04526619, -4.77118557, -5.04520325, -3.02616084, 1.14274513, 0.35082495, 1.64467922, 0.38254332, 0.03085198, 0.83964697, -0.38594229, -0.51760032, 1.39294962, 0.22815041, 0.3301235]]}]}