From 4e61a5132ec4b0b2a2e036ffb6d1462c1c493b3e Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Mon, 19 Feb 2024 17:06:42 +0100
Subject: [PATCH 01/16] add TGIS suite with initial adjustments

---
 .../LLMs/422__model_serving_llm _tgis.robot   | 542 ++++++++++++++++++
 1 file changed, 542 insertions(+)
 create mode 100644 ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot
new file mode 100644
index 000000000..c94622146
--- /dev/null
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot	
@@ -0,0 +1,542 @@
+*** Settings ***
+Documentation     Collection of CLI tests to validate the model serving stack for Large Language Models (LLM).
+...               These tests leverage on TGIS Standalone Serving Runtime
+Resource          ../../../../Resources/OCP.resource
+Resource          ../../../../Resources/CLI/ModelServing/llm.resource
+Library            OpenShiftLibrary
+Suite Setup       Suite Setup
+Suite Teardown    RHOSi Teardown
+Test Tags         KServe
+
+
+*** Variables ***
+${FLAN_MODEL_S3_DIR}=    flan-t5-small/flan-t5-small-hf
+${FLAN_GRAMMAR_MODEL_S3_DIR}=    flan-t5-large-grammar-synthesis-caikit/flan-t5-large-grammar-synthesis-caikit
+${FLAN_LARGE_MODEL_S3_DIR}=    flan-t5-large/flan-t5-large
+${BLOOM_MODEL_S3_DIR}=    bloom-560m/bloom-560m-caikit
+${FLAN_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_MODEL_S3_DIR}
+${FLAN_GRAMMAR_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_GRAMMAR_MODEL_S3_DIR}/artifacts
+${FLAN_LARGE_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_LARGE_MODEL_S3_DIR}/artifacts
+${BLOOM_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${BLOOM_MODEL_S3_DIR}/artifacts
+${TEST_NS}=    tgis-standalone2
+${TGIS_RUNTIME_NAME}=    tgis-runtime
+
+
+*** Test Cases ***
+Verify User Can Serve And Query A Model
+    [Documentation]    Basic tests for preparing, deploying and querying a LLM model
+    ...                using Kserve and Caikit+TGIS runtime
+    [Tags]    Sanity    Tier1    ODS-2341
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${TEST_NS}-cli
+    ${test_namespace}=    Set Variable     ${TEST_NS}-cli
+    ${flan_model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=all-tokens    n_times=1
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=streaming    n_times=1
+    ...    namespace=${test_namespace}    validate_response=${FALSE}
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify User Can Deploy Multiple Models In The Same Namespace
+    [Documentation]    Checks if user can deploy and query multiple models in the same namespace
+    [Tags]    Sanity    Tier1    ODS-2371
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${TEST_NS}-multisame
+    ${test_namespace}=    Set Variable     ${TEST_NS}-multisame
+    ${model_one_name}=    Set Variable    bloom-560m-caikit
+    ${model_two_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${model_one_name}    ${model_two_name}
+    Compile Inference Service YAML    isvc_name=${model_one_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${BLOOM_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Compile Inference Service YAML    isvc_name=${model_two_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_one_name}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_two_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${model_one_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    n_times=5    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${model_two_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    n_times=10    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${model_one_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    n_times=5    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${model_two_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    n_times=10    namespace=${test_namespace}
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify User Can Deploy Multiple Models In Different Namespaces
+    [Documentation]    Checks if user can deploy and query multiple models in the different namespaces
+    [Tags]    Sanity    Tier1    ODS-2378
+    [Setup]    Run Keywords    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-multi1
+    ...        AND
+    ...        Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-multi2
+    ${model_one_name}=    Set Variable    bloom-560m-caikit
+    ${model_two_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names_ns_1}=    Create List    ${model_one_name}
+    ${models_names_ns_2}=    Create List    ${model_two_name}
+    Compile Inference Service YAML    isvc_name=${model_one_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${BLOOM_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=singlemodel-multi1
+    Compile Inference Service YAML    isvc_name=${model_two_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=singlemodel-multi2
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_one_name}
+    ...    namespace=singlemodel-multi1
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_two_name}
+    ...    namespace=singlemodel-multi2
+    Query Model Multiple Times    model_name=${model_one_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    n_times=2    namespace=singlemodel-multi1
+    Query Model Multiple Times    model_name=${model_two_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    n_times=2    namespace=singlemodel-multi2
+    [Teardown]    Run Keywords    Clean Up Test Project    test_ns=singlemodel-multi1    isvc_names=${models_names_ns_1}
+    ...           wait_prj_deletion=${FALSE}
+    ...           AND
+    ...           Clean Up Test Project    test_ns=singlemodel-multi2    isvc_names=${models_names_ns_2}
+    ...           wait_prj_deletion=${FALSE}
+
+Verify Model Upgrade Using Canaray Rollout
+    [Documentation]    Checks if user can apply Canary Rollout as deployment strategy
+    [Tags]    Sanity    Tier1    ODS-2372
+    ...       AutomationBug
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=canary-model-upgrade
+    ${test_namespace}=    Set Variable    canary-model-upgrade
+    ${isvc_name}=    Set Variable    canary-caikit
+    ${model_name}=    Set Variable    flan-t5-small-caikit
+    ${isvcs_names}=    Create List    ${isvc_name}
+    ${canary_percentage}=    Set Variable    ${30}
+    Compile Deploy And Query LLM model   isvc_name=${isvc_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_name=${model_name}
+    ...    namespace=${test_namespace}
+    ...    validate_response=${FALSE}
+    Log To Console    Applying Canary Tarffic for Model Upgrade
+    ${model_name}=    Set Variable    bloom-560m-caikit
+    Compile Deploy And Query LLM model   isvc_name=${isvc_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${BLOOM_STORAGE_URI}
+    ...    model_name=${model_name}
+    ...    canaryTrafficPercent=${canary_percentage}
+    ...    namespace=${test_namespace}
+    ...    validate_response=${FALSE}
+    ...    n_queries=${0}
+    Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${canary_percentage}
+    ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
+    Log To Console    Remove Canary Tarffic For Model Upgrade
+    Compile Deploy And Query LLM model    isvc_name=${isvc_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_name=${model_name}
+    ...    model_storage_uri=${BLOOM_STORAGE_URI}
+    ...    namespace=${test_namespace}
+    Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${100}
+    ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
+    [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${isvcs_names}    wait_prj_deletion=${FALSE}
+
+Verify Model Pods Are Deleted When No Inference Service Is Present
+    [Documentation]    Checks if model pods gets successfully deleted after
+    ...                deleting the KServe InferenceService object
+    [Tags]    Tier2    ODS-2373    AutomationBug
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=no-infer-kserve
+    ${flan_isvc_name}=    Set Variable    flan-t5-small-caikit
+    ${model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${model_name}
+    Compile Deploy And Query LLM model   isvc_name=${flan_isvc_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_name=${model_name}
+    ...    namespace=no-infer-kserve
+    Delete InfereceService    isvc_name=${flan_isvc_name}    namespace=no-infer-kserve
+    ${rc}    ${out}=    Run And Return Rc And Output    oc wait pod -l serving.kserve.io/inferenceservice=${flan_isvc_name} -n no-infer-kserve --for=delete --timeout=200s
+    Should Be Equal As Integers    ${rc}    ${0}
+    [Teardown]   Clean Up Test Project    test_ns=no-infer-kserve
+    ...    isvc_names=${models_names}   isvc_delete=${FALSE}
+    ...    wait_prj_deletion=${FALSE}
+
+Verify User Can Change The Minimum Number Of Replicas For A Model
+    [Documentation]    Checks if user can change the minimum number of replicas
+    ...                of a deployed model.
+    ...                Affected by:  https://issues.redhat.com/browse/SRVKS-1175
+    [Tags]    Sanity    Tier1    ODS-2376
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${TEST_NS}-reps
+    ${test_namespace}=    Set Variable     ${TEST_NS}-reps
+    ${model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${model_name}
+    Compile Inference Service YAML    isvc_name=${model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    min_replicas=1
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_name}
+    ...    namespace=${test_namespace}    exp_replicas=1
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}    n_times=3
+    ...    namespace=${test_namespace}
+    ${rev_id}=    Set Minimum Replicas Number    n_replicas=3    model_name=${model_name}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Terminated    label_selector=serving.knative.dev/revisionUID=${rev_id}
+    ...    namespace=${test_namespace}    timeout=360s
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_name}
+    ...    namespace=${test_namespace}    exp_replicas=3
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}    n_times=3
+    ...    namespace=${test_namespace}
+    ${rev_id}=    Set Minimum Replicas Number    n_replicas=1    model_name=${model_name}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Terminated    label_selector=serving.knative.dev/revisionUID=${rev_id}
+    ...    namespace=${test_namespace}    timeout=360s
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_name}
+    ...    namespace=${test_namespace}    exp_replicas=1
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}    n_times=3
+    ...    namespace=${test_namespace}
+    [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify User Can Autoscale Using Concurrency
+    [Documentation]    Checks if model successfully scale up based on concurrency metrics (KPA)
+    [Tags]    Sanity    Tier1    ODS-2377
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=autoscale-con
+    ${test_namespace}=    Set Variable    autoscale-con
+    ${flan_model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    auto_scale=True
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}    n_times=10
+    ...    namespace=${test_namespace}    validate_response=${FALSE}    background=${TRUE}
+    Wait For Pods Number    number=1    comparison=GREATER THAN
+    ...    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify User Can Validate Scale To Zero
+    [Documentation]    Checks if model successfully scale down to 0 if there's no traffic
+    [Tags]    Sanity    Tier1    ODS-2379    AutomationBug
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=autoscale-zero
+    ${flan_model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=autoscale-zero
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=autoscale-zero
+    ${host}=    Get KServe Inference Host Via CLI    isvc_name=${flan_model_name}   namespace=autoscale-zero
+    ${body}=    Set Variable    '{"text": "At what temperature does liquid Nitrogen boil?"}'
+    ${header}=    Set Variable    'mm-model-id: ${flan_model_name}'
+    Query Model With GRPCURL   host=${host}    port=443
+    ...    endpoint="caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict"
+    ...    json_body=${body}    json_header=${header}
+    ...    insecure=${TRUE}
+    Set Minimum Replicas Number    n_replicas=0    model_name=${flan_model_name}
+    ...    namespace=autoscale-zero
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=autoscale-zero
+    Wait For Pods To Be Terminated    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=autoscale-zero
+    Query Model With GRPCURL   host=${host}    port=443
+    ...    endpoint="caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict"
+    ...    json_body=${body}    json_header=${header}
+    ...    insecure=${TRUE}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=autoscale-zero
+    Wait For Pods To Be Terminated    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=autoscale-zero
+    [Teardown]   Clean Up Test Project    test_ns=autoscale-zero
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify User Can Set Requests And Limits For A Model
+    [Documentation]    Checks if user can set HW request and limits on their inference service object
+    [Tags]    Sanity    Tier1    ODS-2380
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=hw-res
+    ${test_namespace}=    Set Variable    hw-res
+    ${flan_model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    ${requests}=    Create Dictionary    cpu=1    memory=2Gi
+    ${limits}=    Create Dictionary    cpu=2    memory=4Gi
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    requests_dict=${requests}    limits_dict=${limits}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    ${rev_id}=    Get Current Revision ID    model_name=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}    n_times=1
+    ...    namespace=${test_namespace}
+    Container Hardware Resources Should Match Expected    container_name=kserve-container
+    ...    pod_label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}    exp_requests=${requests}    exp_limits=${limits}
+    ${new_requests}=    Create Dictionary    cpu=2    memory=3Gi
+    Set Model Hardware Resources    model_name=${flan_model_name}    namespace=hw-res
+    ...    requests=${new_requests}    limits=${NONE}
+    Wait For Pods To Be Terminated    label_selector=serving.knative.dev/revisionUID=${rev_id}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}    exp_replicas=1
+    Container Hardware Resources Should Match Expected    container_name=kserve-container
+    ...    pod_label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}    exp_requests=${new_requests}    exp_limits=${NONE}
+    [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify Model Can Be Served And Query On A GPU Node
+    [Documentation]    Basic tests for preparing, deploying and querying a LLM model on GPU node
+    ...                using Kserve and Caikit+TGIS runtime
+    [Tags]    Sanity    Tier1    ODS-2381    Resources-GPU
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-gpu
+    ${test_namespace}=    Set Variable    singlemodel-gpu
+    ${model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${model_name}
+    ${requests}=    Create Dictionary    nvidia.com/gpu=1
+    ${limits}=    Create Dictionary    nvidia.com/gpu=1
+    Compile Inference Service YAML    isvc_name=${model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    requests_dict=${requests}    limits_dict=${limits}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_name}
+    ...    namespace=${test_namespace}
+    Container Hardware Resources Should Match Expected    container_name=kserve-container
+    ...    pod_label_selector=serving.kserve.io/inferenceservice=${model_name}
+    ...    namespace=${test_namespace}    exp_requests=${requests}    exp_limits=${limits}
+    Model Pod Should Be Scheduled On A GPU Node    label_selector=serving.kserve.io/inferenceservice=${model_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}    n_times=10
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}    n_times=5
+    ...    namespace=${test_namespace}    inference_type=streaming    validate_response=${FALSE}
+    [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${model_name}    wait_prj_deletion=${FALSE}
+
+Verify Non Admin Can Serve And Query A Model
+    [Documentation]    Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model
+    ...                using Kserve and Caikit+TGIS runtime
+    [Tags]    Sanity    Tier1    ODS-2326
+    [Setup]    Run Keywords   Login To OCP Using API    ${TEST_USER_3.USERNAME}    ${TEST_USER_3.PASSWORD}  AND
+    ...        Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=non-admin-test
+    ${test_namespace}=    Set Variable     non-admin-test
+    ${flan_model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    ${host}=    Get KServe Inference Host Via CLI    isvc_name=${flan_model_name}   namespace=${test_namespace}
+    ${body}=    Set Variable    '{"text": "${EXP_RESPONSES}[queries][0][query_text]"}'
+    ${header}=    Set Variable    'mm-model-id: ${flan_model_name}'
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=all-tokens    n_times=1
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=streaming    n_times=1    validate_response=${FALSE}
+    ...    namespace=${test_namespace}
+    [Teardown]  Run Keywords   Login To OCP Using API    ${OCP_ADMIN_USER.USERNAME}    ${OCP_ADMIN_USER.PASSWORD}   AND
+    ...        Clean Up Test Project    test_ns=${test_namespace}   isvc_names=${models_names}
+    ...        wait_prj_deletion=${FALSE}
+
+Verify User Can Serve And Query Flan-t5 Grammar Syntax Corrector
+    [Documentation]    Deploys and queries flan-t5-large-grammar-synthesis model
+    [Tags]    Tier2    ODS-2441
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=grammar-model
+    ${test_namespace}=    Set Variable     grammar-model
+    ${flan_model_name}=    Set Variable    flan-t5-large-grammar-synthesis-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_GRAMMAR_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=all-tokens    n_times=1
+    ...    namespace=${test_namespace}    query_idx=1
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=streaming    n_times=1    validate_response=${FALSE}
+    ...    namespace=${test_namespace}    query_idx=${1}
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify User Can Serve And Query Flan-t5 Large
+    [Documentation]    Deploys and queries flan-t5-large model
+    [Tags]    Tier2    ODS-2434
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=flan-t5-large3
+    ${test_namespace}=    Set Variable     flan-t5-large3
+    ${flan_model_name}=    Set Variable    flan-t5-large
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_LARGE_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=all-tokens    n_times=1
+    ...    namespace=${test_namespace}    query_idx=${0}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=streaming    n_times=1    validate_response=${FALSE}
+    ...    namespace=${test_namespace}    query_idx=${0}
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify Runtime Upgrade Does Not Affect Deployed Models
+    [Documentation]    Upgrades the caikit runtime inthe same NS where a model
+    ...                is already deployed. The expecation is that the current model
+    ...                must remain unchanged after the runtime upgrade.
+    ...                ATTENTION: this is an approximation of the runtime upgrade scenario, however
+    ...                the real case scenario will be defined once RHODS actually ships the Caikit runtime.
+    [Tags]    Sanity    Tier1    ODS-2404
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${TEST_NS}-up
+    ${test_namespace}=    Set Variable     ${TEST_NS}-up
+    ${flan_model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=all-tokens    n_times=1
+    ...    namespace=${test_namespace}
+    ${created_at}    ${caikitsha}=    Get Model Pods Creation Date And Image URL    model_name=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Upgrade Caikit Runtime Image    new_image_url=quay.io/opendatahub/caikit-tgis-serving:stable
+    ...    namespace=${test_namespace}
+    Sleep    5s    reason=Sleep, in case the runtime upgrade takes some time to start performing actions on the pods...
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}    exp_replicas=1
+    ${created_at_after}    ${caikitsha_after}=    Get Model Pods Creation Date And Image URL    model_name=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Should Be Equal    ${created_at}    ${created_at_after}
+    Should Be Equal As Strings    ${caikitsha}    ${caikitsha_after}
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify User Can Access Model Metrics From UWM
+    [Documentation]    Verifies that model metrics are available for users in the
+    ...                OpenShift monitoring system (UserWorkloadMonitoring)
+    ...                PARTIALLY DONE: it is checking number of requests, number of successful requests
+    ...                and model pod cpu usage. Waiting for a complete list of expected metrics and
+    ...                derived metrics.
+    [Tags]    Sanity    Tier1    ODS-2401
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-metrics    enable_metrics=${TRUE}
+    ${test_namespace}=    Set Variable     singlemodel-metrics
+    ${flan_model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    ${thanos_url}=    Get OpenShift Thanos URL
+    ${token}=    Generate Thanos Token
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Wait Until Keyword Succeeds    30 times    4s
+    ...    TGI Caikit And Istio Metrics Should Exist    thanos_url=${thanos_url}    thanos_token=${token}
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=all-tokens    n_times=3
+    ...    namespace=${test_namespace}
+    Wait Until Keyword Succeeds    50 times    5s
+    ...    User Can Fetch Number Of Requests Over Defined Time    thanos_url=${thanos_url}    thanos_token=${token}
+    ...    model_name=${flan_model_name}    query_kind=single    namespace=${test_namespace}    period=5m    exp_value=3
+    Wait Until Keyword Succeeds    20 times    5s
+    ...    User Can Fetch Number Of Successful Requests Over Defined Time    thanos_url=${thanos_url}    thanos_token=${token}
+    ...    model_name=${flan_model_name}    namespace=${test_namespace}    period=5m    exp_value=3
+    Wait Until Keyword Succeeds    20 times    5s
+    ...    User Can Fetch CPU Utilization    thanos_url=${thanos_url}    thanos_token=${token}
+    ...    model_name=${flan_model_name}    namespace=${test_namespace}    period=5m
+    Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=streaming    n_times=1    validate_response=${FALSE}
+    ...    namespace=${test_namespace}    query_idx=${0}
+    Wait Until Keyword Succeeds    30 times    5s
+    ...    User Can Fetch Number Of Requests Over Defined Time    thanos_url=${thanos_url}    thanos_token=${token}
+    ...    model_name=${flan_model_name}    query_kind=stream    namespace=${test_namespace}    period=5m    exp_value=1
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+Verify User Can Query A Model Using HTTP Calls
+    [Documentation]    From RHOAI 2.5 HTTP is allowed and default querying protocol.
+    ...                This tests deploys the runtime enabling HTTP port and send queries to the model
+    [Tags]    ODS-2501    Sanity    Tier1    ProductBug
+    [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=kserve-http    protocol=http
+    ${test_namespace}=    Set Variable     kserve-http
+    ${model_name}=    Set Variable    flan-t5-small-caikit
+    ${models_names}=    Create List    ${model_name}
+    Compile Inference Service YAML    isvc_name=${model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_name}
+    ...    namespace=${test_namespace}
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}    protocol=http
+    ...    inference_type=all-tokens    n_times=1
+    ...    namespace=${test_namespace}    query_idx=${0}
+    # temporarily disabling stream response validation. Need to re-design the expected response json file
+    # because format of streamed response with http is slightly different from grpc
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}    protocol=http
+    ...    inference_type=streaming    n_times=1    validate_response=${FALSE}
+    ...    namespace=${test_namespace}    query_idx=${0}    validate_response=${FALSE}
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+
+
+*** Keywords ***
+Suite Setup
+    [Documentation]
+    Skip If Component Is Not Enabled    kserve
+    RHOSi Setup
+    Load Expected Responses
\ No newline at end of file

From 18ea71f5b6526654eee5d0fab158252ef384d082 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Mon, 19 Feb 2024 17:09:44 +0100
Subject: [PATCH 02/16] remove from sanity

---
 .../LLMs/422__model_serving_llm _tgis.robot   | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot
index c94622146..a8e76ad21 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot	
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot	
@@ -26,7 +26,7 @@ ${TGIS_RUNTIME_NAME}=    tgis-runtime
 Verify User Can Serve And Query A Model
     [Documentation]    Basic tests for preparing, deploying and querying a LLM model
     ...                using Kserve and Caikit+TGIS runtime
-    [Tags]    Sanity    Tier1    ODS-2341
+    [Tags]    Tier1    ODS-2341
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${TEST_NS}-cli
     ${test_namespace}=    Set Variable     ${TEST_NS}-cli
     ${flan_model_name}=    Set Variable    flan-t5-small-caikit
@@ -50,7 +50,7 @@ Verify User Can Serve And Query A Model
 
 Verify User Can Deploy Multiple Models In The Same Namespace
     [Documentation]    Checks if user can deploy and query multiple models in the same namespace
-    [Tags]    Sanity    Tier1    ODS-2371
+    [Tags]    Tier1    ODS-2371
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${TEST_NS}-multisame
     ${test_namespace}=    Set Variable     ${TEST_NS}-multisame
     ${model_one_name}=    Set Variable    bloom-560m-caikit
@@ -84,7 +84,7 @@ Verify User Can Deploy Multiple Models In The Same Namespace
 
 Verify User Can Deploy Multiple Models In Different Namespaces
     [Documentation]    Checks if user can deploy and query multiple models in the different namespaces
-    [Tags]    Sanity    Tier1    ODS-2378
+    [Tags]    Tier1    ODS-2378
     [Setup]    Run Keywords    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-multi1
     ...        AND
     ...        Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-multi2
@@ -119,7 +119,7 @@ Verify User Can Deploy Multiple Models In Different Namespaces
 
 Verify Model Upgrade Using Canaray Rollout
     [Documentation]    Checks if user can apply Canary Rollout as deployment strategy
-    [Tags]    Sanity    Tier1    ODS-2372
+    [Tags]    Tier1    ODS-2372
     ...       AutomationBug
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=canary-model-upgrade
     ${test_namespace}=    Set Variable    canary-model-upgrade
@@ -180,7 +180,7 @@ Verify User Can Change The Minimum Number Of Replicas For A Model
     [Documentation]    Checks if user can change the minimum number of replicas
     ...                of a deployed model.
     ...                Affected by:  https://issues.redhat.com/browse/SRVKS-1175
-    [Tags]    Sanity    Tier1    ODS-2376
+    [Tags]    Tier1    ODS-2376
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${TEST_NS}-reps
     ${test_namespace}=    Set Variable     ${TEST_NS}-reps
     ${model_name}=    Set Variable    flan-t5-small-caikit
@@ -217,7 +217,7 @@ Verify User Can Change The Minimum Number Of Replicas For A Model
 
 Verify User Can Autoscale Using Concurrency
     [Documentation]    Checks if model successfully scale up based on concurrency metrics (KPA)
-    [Tags]    Sanity    Tier1    ODS-2377
+    [Tags]    Tier1    ODS-2377
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=autoscale-con
     ${test_namespace}=    Set Variable    autoscale-con
     ${flan_model_name}=    Set Variable    flan-t5-small-caikit
@@ -243,7 +243,7 @@ Verify User Can Autoscale Using Concurrency
 
 Verify User Can Validate Scale To Zero
     [Documentation]    Checks if model successfully scale down to 0 if there's no traffic
-    [Tags]    Sanity    Tier1    ODS-2379    AutomationBug
+    [Tags]    Tier1    ODS-2379    AutomationBug
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=autoscale-zero
     ${flan_model_name}=    Set Variable    flan-t5-small-caikit
     ${models_names}=    Create List    ${flan_model_name}
@@ -281,7 +281,7 @@ Verify User Can Validate Scale To Zero
 
 Verify User Can Set Requests And Limits For A Model
     [Documentation]    Checks if user can set HW request and limits on their inference service object
-    [Tags]    Sanity    Tier1    ODS-2380
+    [Tags]    Tier1    ODS-2380
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=hw-res
     ${test_namespace}=    Set Variable    hw-res
     ${flan_model_name}=    Set Variable    flan-t5-small-caikit
@@ -320,7 +320,7 @@ Verify User Can Set Requests And Limits For A Model
 Verify Model Can Be Served And Query On A GPU Node
     [Documentation]    Basic tests for preparing, deploying and querying a LLM model on GPU node
     ...                using Kserve and Caikit+TGIS runtime
-    [Tags]    Sanity    Tier1    ODS-2381    Resources-GPU
+    [Tags]    Tier1    ODS-2381    Resources-GPU
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-gpu
     ${test_namespace}=    Set Variable    singlemodel-gpu
     ${model_name}=    Set Variable    flan-t5-small-caikit
@@ -351,7 +351,7 @@ Verify Model Can Be Served And Query On A GPU Node
 Verify Non Admin Can Serve And Query A Model
     [Documentation]    Basic tests leveraging on a non-admin user for preparing, deploying and querying a LLM model
     ...                using Kserve and Caikit+TGIS runtime
-    [Tags]    Sanity    Tier1    ODS-2326
+    [Tags]    Tier1    ODS-2326
     [Setup]    Run Keywords   Login To OCP Using API    ${TEST_USER_3.USERNAME}    ${TEST_USER_3.PASSWORD}  AND
     ...        Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=non-admin-test
     ${test_namespace}=    Set Variable     non-admin-test
@@ -432,7 +432,7 @@ Verify Runtime Upgrade Does Not Affect Deployed Models
     ...                must remain unchanged after the runtime upgrade.
     ...                ATTENTION: this is an approximation of the runtime upgrade scenario, however
     ...                the real case scenario will be defined once RHODS actually ships the Caikit runtime.
-    [Tags]    Sanity    Tier1    ODS-2404
+    [Tags]    Tier1    ODS-2404
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${TEST_NS}-up
     ${test_namespace}=    Set Variable     ${TEST_NS}-up
     ${flan_model_name}=    Set Variable    flan-t5-small-caikit
@@ -468,7 +468,7 @@ Verify User Can Access Model Metrics From UWM
     ...                PARTIALLY DONE: it is checking number of requests, number of successful requests
     ...                and model pod cpu usage. Waiting for a complete list of expected metrics and
     ...                derived metrics.
-    [Tags]    Sanity    Tier1    ODS-2401
+    [Tags]    Tier1    ODS-2401
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-metrics    enable_metrics=${TRUE}
     ${test_namespace}=    Set Variable     singlemodel-metrics
     ${flan_model_name}=    Set Variable    flan-t5-small-caikit
@@ -509,7 +509,7 @@ Verify User Can Access Model Metrics From UWM
 Verify User Can Query A Model Using HTTP Calls
     [Documentation]    From RHOAI 2.5 HTTP is allowed and default querying protocol.
     ...                This tests deploys the runtime enabling HTTP port and send queries to the model
-    [Tags]    ODS-2501    Sanity    Tier1    ProductBug
+    [Tags]    ODS-2501    Tier1    ProductBug
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=kserve-http    protocol=http
     ${test_namespace}=    Set Variable     kserve-http
     ${model_name}=    Set Variable    flan-t5-small-caikit

From 838219e61dc0e6cad4fc6a254df3ea426c8b13cd Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Mon, 19 Feb 2024 17:17:20 +0100
Subject: [PATCH 03/16] fix ODS-2378

---
 .../420__model_serving/LLMs/422__model_serving_llm _tgis.robot   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot
index a8e76ad21..a2c92d226 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot	
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot	
@@ -101,6 +101,7 @@ Verify User Can Deploy Multiple Models In Different Namespaces
     Compile Inference Service YAML    isvc_name=${model_two_name}
     ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
     ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
     Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
     ...    namespace=singlemodel-multi2
     Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_one_name}

From 46552b18456df6158817b2b3d6df16bce6f3f2b7 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Mon, 19 Feb 2024 17:32:52 +0100
Subject: [PATCH 04/16] fix ODS-2373

---
 .../LLMs/422__model_serving_llm _tgis.robot                  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot
index a2c92d226..37e0e1114 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot	
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot	
@@ -134,6 +134,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    model_name=${model_name}
     ...    namespace=${test_namespace}
     ...    validate_response=${FALSE}
+    ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     Log To Console    Applying Canary Tarffic for Model Upgrade
     ${model_name}=    Set Variable    bloom-560m-caikit
     Compile Deploy And Query LLM model   isvc_name=${isvc_name}
@@ -144,6 +145,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    namespace=${test_namespace}
     ...    validate_response=${FALSE}
     ...    n_queries=${0}
+    ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${canary_percentage}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
     Log To Console    Remove Canary Tarffic For Model Upgrade
@@ -160,7 +162,7 @@ Verify Model Upgrade Using Canaray Rollout
 Verify Model Pods Are Deleted When No Inference Service Is Present
     [Documentation]    Checks if model pods gets successfully deleted after
     ...                deleting the KServe InferenceService object
-    [Tags]    Tier2    ODS-2373    AutomationBug
+    [Tags]    Tier2    ODS-2373
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=no-infer-kserve
     ${flan_isvc_name}=    Set Variable    flan-t5-small-caikit
     ${model_name}=    Set Variable    flan-t5-small-caikit
@@ -170,6 +172,7 @@ Verify Model Pods Are Deleted When No Inference Service Is Present
     ...    model_storage_uri=${FLAN_STORAGE_URI}
     ...    model_name=${model_name}
     ...    namespace=no-infer-kserve
+    ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     Delete InfereceService    isvc_name=${flan_isvc_name}    namespace=no-infer-kserve
     ${rc}    ${out}=    Run And Return Rc And Output    oc wait pod -l serving.kserve.io/inferenceservice=${flan_isvc_name} -n no-infer-kserve --for=delete --timeout=200s
     Should Be Equal As Integers    ${rc}    ${0}

From 1b87609c8c845d4e2d22c807ffa9a6516aad9372 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 09:25:11 +0100
Subject: [PATCH 05/16] fix file name

---
 ..._serving_llm _tgis.robot => 422__model_serving_llm_tgis.robot} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/{422__model_serving_llm _tgis.robot => 422__model_serving_llm_tgis.robot} (100%)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
similarity index 100%
rename from ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm _tgis.robot
rename to ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot

From 7aa0889554f7d1d0e7a72c92fc8f875d569555e0 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 09:55:33 +0100
Subject: [PATCH 06/16] fix compile deploy and query keyword

---
 ods_ci/tests/Resources/CLI/ModelServing/llm.resource | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
index 875866dea..5f206d161 100644
--- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
+++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
@@ -290,13 +290,14 @@ Compile Deploy And Query LLM model
     [Documentation]    Group together the test steps for preparing, deploying
     ...                and querying a model
     [Arguments]    ${model_storage_uri}    ${model_name}    ${isvc_name}=${model_name}
-    ...            ${runtime}=caikit-tgis-runtime    ${protocol}=grpc    ${inference_type}=all-tokens
+    ...            ${runtime}=caikit-tgis-runtime    ${model_format}=caikit    ${protocol}=grpc    ${inference_type}=all-tokens
     ...            ${canaryTrafficPercent}=${EMPTY}   ${namespace}=${TEST_NS}  ${sa_name}=${DEFAULT_BUCKET_SA_NAME}
     ...            ${n_queries}=${1}    ${query_idx}=${0}    ${validate_response}=${TRUE}
     Compile Inference Service YAML    isvc_name=${isvc_name}
-    ...    sa_name=${sa_name}
+    ...    sa_name=${sa_name}    serving_runtime=${runtime}
     ...    model_storage_uri=${model_storage_uri}
     ...    canaryTrafficPercent=${canaryTrafficPercent}
+    ...    model_format=${model_format}
     Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
     ...    namespace=${namespace}
     Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${isvc_name}

From 7eff9490eef80f0d5e6b1af52fbe7e721db38f87 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 09:57:08 +0100
Subject: [PATCH 07/16] add clone proto files

---
 .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
index 37e0e1114..2bd1847a7 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -543,4 +543,5 @@ Suite Setup
     [Documentation]
     Skip If Component Is Not Enabled    kserve
     RHOSi Setup
-    Load Expected Responses
\ No newline at end of file
+    Load Expected Responses
+    Run    git clone https://github.com/IBM/text-generation-inference/
\ No newline at end of file

From c1e879a892af025cf8038abcf7e652b0cdf1c107 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 12:17:49 +0100
Subject: [PATCH 08/16] fix ODS-2377

---
 ods_ci/tests/Resources/CLI/ModelServing/llm.resource | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
index 5f206d161..1ebb5ebc9 100644
--- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
+++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
@@ -273,12 +273,12 @@ Query Model Multiple Times
             Run Keyword And Continue On Failure    Status Should Be  200
         END
         Log    ${res}
-        ${response_container_field}=    Set Variable    ${runtime_details}[response_fields_map][response]
-        IF    "${response_container_field}" != "${EMPTY}"
-            # runtimes may support multiple queries per time. Here forcing to use only 1 for sake of simplicity.
-            ${res}=    Set Variable    ${res}[${response_container_field}][0]            
-        END
         IF    ${validate_response} == ${TRUE}
+            ${response_container_field}=    Set Variable    ${runtime_details}[response_fields_map][response]
+            IF    "${response_container_field}" != "${EMPTY}"
+                # runtimes may support multiple queries per time. Here forcing to use only 1 for sake of simplicity.
+                ${res}=    Set Variable    ${res}[${response_container_field}][0]            
+            END
             Run Keyword And Continue On Failure
             ...    Model Response Should Match The Expectation    model_response=${res}    model_name=${model_name}
             ...    runtime_details=${runtime_details}    runtime=${runtime}

From 473df0cd046a8c2e97967923bd52a43a977a4cc7 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 12:18:30 +0100
Subject: [PATCH 09/16] fix ODS-2371

---
 .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
index 2bd1847a7..769304939 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -65,6 +65,7 @@ Verify User Can Deploy Multiple Models In The Same Namespace
     Compile Inference Service YAML    isvc_name=${model_two_name}
     ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
     ...    model_storage_uri=${FLAN_STORAGE_URI}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
     Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
     ...    namespace=${test_namespace}
     Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_one_name}

From a4d908e9b5ea4aa9e76ad483acb19eab91a5611c Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 12:50:41 +0100
Subject: [PATCH 10/16] change project name

---
 .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
index 769304939..10aa77eba 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -18,7 +18,7 @@ ${FLAN_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_MODEL_S3_DIR}
 ${FLAN_GRAMMAR_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_GRAMMAR_MODEL_S3_DIR}/artifacts
 ${FLAN_LARGE_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_LARGE_MODEL_S3_DIR}/artifacts
 ${BLOOM_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${BLOOM_MODEL_S3_DIR}/artifacts
-${TEST_NS}=    tgis-standalone2
+${TEST_NS}=    tgis-standalone
 ${TGIS_RUNTIME_NAME}=    tgis-runtime
 
 

From 51567b1261d809038beaf0abac0d52feb87acf87 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 17:09:35 +0100
Subject: [PATCH 11/16] fix ODS-2380

---
 .../Resources/CLI/ModelServing/llm.resource    | 18 +++++++++++-------
 .../LLMs/422__model_serving_llm.robot          |  8 ++++----
 .../LLMs/422__model_serving_llm_tgis.robot     |  7 ++++---
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
index 1ebb5ebc9..8616a328c 100644
--- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
+++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
@@ -307,24 +307,28 @@ Compile Deploy And Query LLM model
     ...    validate_response=${validate_response}    protocol=${protocol}
     ...    runtime=${runtime}    inference_type=${inference_type}
 
-Upgrade Caikit Runtime Image
+Upgrade Runtime Image
     [Documentation]    Replaces the image URL of the Caikit Runtim with the given
     ...    ${new_image_url}
-    [Arguments]    ${new_image_url}    ${namespace}
+    [Arguments]    ${new_image_url}    ${namespace}    ${container}    ${runtime}
+    ${rc}    ${container_idx}=    Run And Return Rc And Output
+    ...    oc get ServingRuntime/${runtime} -n ${namespace} -o json | jq '.spec.containers | map(.name == "${container}") | index(true)'    # robocop: disable
     ${rc}    ${out}=    Run And Return Rc And Output
-    ...    oc patch ServingRuntime caikit-tgis-runtime -n ${namespace} --type=json -p="[{'op': 'replace', 'path': '/spec/containers/0/image', 'value': '${new_image_url}'}]"    # robocop: disable
+    ...    oc patch ServingRuntime ${runtime} -n ${namespace} --type=json -p="[{'op': 'replace', 'path': '/spec/containers/${container_idx}/image', 'value': '${new_image_url}'}]"    # robocop: disable
     Should Be Equal As Integers    ${rc}    ${0}
 
 Get Model Pods Creation Date And Image URL
     [Documentation]    Fetches the creation date and the caikit runtime image URL.
     ...                Useful in upgrade scenarios
-    [Arguments]    ${model_name}    ${namespace}
+    [Arguments]    ${model_name}    ${namespace}    ${container}
     ${created_at}=    Oc Get    kind=Pod    label_selector=serving.kserve.io/inferenceservice=${model_name}
     ...    namespace=${namespace}    fields=["metadata.creationTimestamp"]
-    ${rc}    ${caikitsha}=    Run And Return Rc And Output
-    ...    oc get pod --selector serving.kserve.io/inferenceservice=${model_name} -n ${namespace} -ojson | jq '.items[].spec.containers[].image' | grep caikit-tgis    # robocop: disable
+    ${rc}    ${container_idx}=    Run And Return Rc And Output
+    ...    oc get pod --selector serving.kserve.io/inferenceservice=${model_name} -n ${namespace} -o json | jq '.items[].spec.containers | map(.name == "${container}") | index(true)'    # robocop: disable
+    ${rc}    ${image_url}=    Run And Return Rc And Output
+    ...    oc get pod --selector serving.kserve.io/inferenceservice=${model_name} -n ${namespace} -ojson | jq '.items[].spec.containers[${container_idx}].image'    # robocop: disable
     Should Be Equal As Integers    ${rc}    ${0}
-    RETURN    ${created_at}    ${caikitsha}
+    RETURN    ${created_at}    ${image_url}
 
 User Can Fetch Number Of Requests Over Defined Time
     [Documentation]    Fetches the `tgi_request_count` metric and checks that it reports the expected
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
index 01cb848d0..cac0a58c0 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
@@ -465,14 +465,14 @@ Verify Runtime Upgrade Does Not Affect Deployed Models
     ...    inference_type=all-tokens    n_times=1
     ...    namespace=${test_namespace}
     ${created_at}    ${caikitsha}=    Get Model Pods Creation Date And Image URL    model_name=${flan_model_name}
-    ...    namespace=${test_namespace}
-    Upgrade Caikit Runtime Image    new_image_url=quay.io/opendatahub/caikit-tgis-serving:stable
-    ...    namespace=${test_namespace}
+    ...    namespace=${test_namespace}    container=transformer-container
+    Upgrade Runtime Image    new_image_url=quay.io/opendatahub/caikit-tgis-serving:stable
+    ...    namespace=${test_namespace}    container=transformer-container    runtime=caikit-tgis-runtime
     Sleep    5s    reason=Sleep, in case the runtime upgrade takes some time to start performing actions on the pods...
     Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
     ...    namespace=${test_namespace}    exp_replicas=1
     ${created_at_after}    ${caikitsha_after}=    Get Model Pods Creation Date And Image URL    model_name=${flan_model_name}
-    ...    namespace=${test_namespace}
+    ...    namespace=${test_namespace}    container=transformer-container
     Should Be Equal    ${created_at}    ${created_at_after}
     Should Be Equal As Strings    ${caikitsha}    ${caikitsha_after}
     [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
index 10aa77eba..21a55bf84 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -454,14 +454,15 @@ Verify Runtime Upgrade Does Not Affect Deployed Models
     ...    inference_type=all-tokens    n_times=1
     ...    namespace=${test_namespace}
     ${created_at}    ${caikitsha}=    Get Model Pods Creation Date And Image URL    model_name=${flan_model_name}
-    ...    namespace=${test_namespace}
-    Upgrade Caikit Runtime Image    new_image_url=quay.io/opendatahub/caikit-tgis-serving:stable
+    ...    namespace=${test_namespace}    container=kserve-container
+    Upgrade Runtime Image    container=kserve-container    runtime=${TGIS_RUNTIME_NAME}
+    ...    new_image_url=quay.io/modh/text-generation-inference:fast
     ...    namespace=${test_namespace}
     Sleep    5s    reason=Sleep, in case the runtime upgrade takes some time to start performing actions on the pods...
     Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
     ...    namespace=${test_namespace}    exp_replicas=1
     ${created_at_after}    ${caikitsha_after}=    Get Model Pods Creation Date And Image URL    model_name=${flan_model_name}
-    ...    namespace=${test_namespace}
+    ...    namespace=${test_namespace}    container=kserve-container
     Should Be Equal    ${created_at}    ${created_at_after}
     Should Be Equal As Strings    ${caikitsha}    ${caikitsha_after}
     [Teardown]    Clean Up Test Project    test_ns=${test_namespace}

From 1477b974e09a4d80dd43c6a2af760258d1139922 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 17:48:08 +0100
Subject: [PATCH 12/16] fix ODS-2372

---
 ods_ci/tests/Resources/CLI/ModelServing/llm.resource           | 3 ++-
 .../420__model_serving/LLMs/422__model_serving_llm.robot       | 2 ++
 .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot  | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
index 8616a328c..601bc60e0 100644
--- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
+++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
@@ -457,6 +457,7 @@ Traffic Should Be Redirected Based On Canary Percentage
     ...                matches the expected probability ${exp_percentage}.
     ...                It applies an arbitrary toleration margin of ${toleration}
     [Arguments]    ${exp_percentage}    ${isvc_name}    ${model_name}    ${namespace}
+    ...            ${model_format}    ${runtime}
     ${total}=    Set Variable    ${20}
     ${hits}=    Set Variable    ${0}
     ${toleration}=    Set Variable    ${20}
@@ -464,7 +465,7 @@ Traffic Should Be Redirected Based On Canary Percentage
         Log    ${counter}
         ${status}=    Run Keyword And Return Status
         ...    Query Model Multiple Times    isvc_name=${isvc_name}    model_name=${model_name}    n_times=1
-        ...    namespace=${namespace}
+        ...    namespace=${namespace}    model_format=${model_format}    runtime=${runtime}
         IF    ${status} == ${TRUE}
             ${hits}=    Evaluate    ${hits}+1
         END
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
index cac0a58c0..633a37858 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
@@ -170,6 +170,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    n_queries=${0}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${canary_percentage}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
+    ...    model_format=caikit    runtime=caikit-tgi-runtime
     Log To Console    Remove Canary Tarffic For Model Upgrade
     Compile Deploy And Query LLM model    isvc_name=${isvc_name}
     ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
@@ -178,6 +179,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    namespace=${test_namespace}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${100}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
+    ...    model_format=caikit    runtime=caikit-tgi-runtime
     [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
     ...    isvc_names=${isvcs_names}    wait_prj_deletion=${FALSE}
 
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
index 21a55bf84..185259568 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -149,14 +149,17 @@ Verify Model Upgrade Using Canaray Rollout
     ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${canary_percentage}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
+    ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     Log To Console    Remove Canary Tarffic For Model Upgrade
     Compile Deploy And Query LLM model    isvc_name=${isvc_name}
     ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
     ...    model_name=${model_name}
     ...    model_storage_uri=${BLOOM_STORAGE_URI}
     ...    namespace=${test_namespace}
+    ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${100}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
+    ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
     ...    isvc_names=${isvcs_names}    wait_prj_deletion=${FALSE}
 

From d57e21fd6fe27c500057c095099c5b6fb3311568 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Tue, 20 Feb 2024 17:48:36 +0100
Subject: [PATCH 13/16] remove auto bug label from ODS-2372

---
 .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
index 185259568..8e8a79853 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -122,7 +122,6 @@ Verify User Can Deploy Multiple Models In Different Namespaces
 Verify Model Upgrade Using Canaray Rollout
     [Documentation]    Checks if user can apply Canary Rollout as deployment strategy
     [Tags]    Tier1    ODS-2372
-    ...       AutomationBug
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=canary-model-upgrade
     ${test_namespace}=    Set Variable    canary-model-upgrade
     ${isvc_name}=    Set Variable    canary-caikit

From 7778cf425aa004a59d7ebe2b449f591cd14d85ad Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Wed, 21 Feb 2024 10:11:44 +0100
Subject: [PATCH 14/16] remove wrong arg from canary kw

---
 ods_ci/tests/Resources/CLI/ModelServing/llm.resource          | 4 ++--
 .../420__model_serving/LLMs/422__model_serving_llm.robot      | 4 ++--
 .../420__model_serving/LLMs/422__model_serving_llm_tgis.robot | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
index 601bc60e0..fab884ac6 100644
--- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
+++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
@@ -457,7 +457,7 @@ Traffic Should Be Redirected Based On Canary Percentage
     ...                matches the expected probability ${exp_percentage}.
     ...                It applies an arbitrary toleration margin of ${toleration}
     [Arguments]    ${exp_percentage}    ${isvc_name}    ${model_name}    ${namespace}
-    ...            ${model_format}    ${runtime}
+    ...            ${runtime}
     ${total}=    Set Variable    ${20}
     ${hits}=    Set Variable    ${0}
     ${toleration}=    Set Variable    ${20}
@@ -465,7 +465,7 @@ Traffic Should Be Redirected Based On Canary Percentage
         Log    ${counter}
         ${status}=    Run Keyword And Return Status
         ...    Query Model Multiple Times    isvc_name=${isvc_name}    model_name=${model_name}    n_times=1
-        ...    namespace=${namespace}    model_format=${model_format}    runtime=${runtime}
+        ...    namespace=${namespace}    runtime=${runtime}
         IF    ${status} == ${TRUE}
             ${hits}=    Evaluate    ${hits}+1
         END
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
index 633a37858..9dcb4df82 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
@@ -170,7 +170,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    n_queries=${0}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${canary_percentage}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
-    ...    model_format=caikit    runtime=caikit-tgi-runtime
+    ...    runtime=caikit-tgi-runtime
     Log To Console    Remove Canary Tarffic For Model Upgrade
     Compile Deploy And Query LLM model    isvc_name=${isvc_name}
     ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
@@ -179,7 +179,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    namespace=${test_namespace}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${100}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
-    ...    model_format=caikit    runtime=caikit-tgi-runtime
+    ...    runtime=caikit-tgi-runtime
     [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
     ...    isvc_names=${isvcs_names}    wait_prj_deletion=${FALSE}
 
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
index 8e8a79853..71873250b 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -148,7 +148,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${canary_percentage}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
-    ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
+    ...    runtime=${TGIS_RUNTIME_NAME}
     Log To Console    Remove Canary Tarffic For Model Upgrade
     Compile Deploy And Query LLM model    isvc_name=${isvc_name}
     ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
@@ -158,7 +158,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${100}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
-    ...    model_format=pytorch    runtime=${TGIS_RUNTIME_NAME}
+    ...    runtime=${TGIS_RUNTIME_NAME}
     [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
     ...    isvc_names=${isvcs_names}    wait_prj_deletion=${FALSE}
 

From 6123d9da2a56fa9ab49c81b5c1c653621bf2e585 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Wed, 21 Feb 2024 10:19:26 +0100
Subject: [PATCH 15/16] change metrics check for tgis ODS-2401

---
 .../LLMs/422__model_serving_llm_tgis.robot                 | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
index 71873250b..08f353070 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -20,6 +20,7 @@ ${FLAN_LARGE_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_LARGE_MODEL_S3_DIR
 ${BLOOM_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${BLOOM_MODEL_S3_DIR}/artifacts
 ${TEST_NS}=    tgis-standalone
 ${TGIS_RUNTIME_NAME}=    tgis-runtime
+@{SEARCH_METRICS}=    tgi_    istio_
 
 
 *** Test Cases ***
@@ -490,9 +491,11 @@ Verify User Can Access Model Metrics From UWM
     Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
     ...    namespace=${test_namespace}
     Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
-    ...    namespace=${test_namespace}
+    ...    namespace=${test_namespace}    
     Wait Until Keyword Succeeds    30 times    4s
-    ...    TGI Caikit And Istio Metrics Should Exist    thanos_url=${thanos_url}    thanos_token=${token}
+    ...    Metrics Should Exist In UserWorkloadMonitoring
+    ...    thanos_url=${thanos_url}    thanos_token=${token}
+    ...    search_metrics=${SEARCH_METRICS}
     Query Model Multiple Times    model_name=${flan_model_name}    runtime=${TGIS_RUNTIME_NAME}
     ...    inference_type=all-tokens    n_times=3
     ...    namespace=${test_namespace}

From 066c8ba3bb6d857ae4d5bef49725a99c67847968 Mon Sep 17 00:00:00 2001
From: bdattoma <bdattoma@redhat.com>
Date: Wed, 21 Feb 2024 12:33:16 +0100
Subject: [PATCH 16/16] fix typo

---
 .../420__model_serving/LLMs/422__model_serving_llm.robot      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
index 9dcb4df82..9f1122d48 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
@@ -170,7 +170,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    n_queries=${0}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${canary_percentage}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
-    ...    runtime=caikit-tgi-runtime
+    ...    runtime=caikit-tgis-runtime
     Log To Console    Remove Canary Tarffic For Model Upgrade
     Compile Deploy And Query LLM model    isvc_name=${isvc_name}
     ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
@@ -179,7 +179,7 @@ Verify Model Upgrade Using Canaray Rollout
     ...    namespace=${test_namespace}
     Traffic Should Be Redirected Based On Canary Percentage    exp_percentage=${100}
     ...    isvc_name=${isvc_name}    model_name=${model_name}    namespace=${test_namespace}
-    ...    runtime=caikit-tgi-runtime
+    ...    runtime=caikit-tgis-runtime
     [Teardown]   Clean Up Test Project    test_ns=${test_namespace}
     ...    isvc_names=${isvcs_names}    wait_prj_deletion=${FALSE}