red-hat-data-services · tarukumar · Mar 8, 2024 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024
diff --git a/ods_ci/tests/Resources/Files/llm/model_expected_responses.json b/ods_ci/tests/Resources/Files/llm/model_expected_responses.json
@@ -80,6 +80,16 @@
 
                 }
             }
+        },
+        {
+            "query_text": "write a python function to print the numbers from 1 to 10",
+            "models": {
+                "codellama-34b-instruct-hf": {
+                    "response_tokens": 20,
+                    "response_text": "0.\n\n\ndef print_numbers(n):\n    for i in range(1",
+                    "streamed_response_text": "{'inputTokenCount':16}{'generatedTokenCount':2,'text':'0'}{'generatedTokenCount':3,'text':'.'}{'generatedTokenCount':4,'text':''}{'generatedTokenCount':5,'text':''}{'generatedTokenCount':6,'text':'de'}{'generatedTokenCount':7,'text':'fprin'}{'generatedTokenCount':8,'text':'t'}{'generatedTokenCount':9,'text':'_number'}{'generatedTokenCount':10,'text':'s'}{'generatedTokenCount':11,'text':'('}{'generatedTokenCount':12,'text':'n)'}{'generatedTokenCount':13,'text':':'}{'generatedTokenCount':14,'text':''}{'generatedTokenCount':15,'text':'fo'}{'generatedTokenCount':16,'text':'r'}{'generatedTokenCount':17,'text':'ii'}{'generatedTokenCount':18,'text':'nrang'}{'generatedTokenCount':19,'text':'e'}{'generatedTokenCount':20,'text':'(1','stopReason':'MAX_TOKENS'}"
+                }
+            }
         }
     ]
 }
diff --git a/...ests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot b/...ests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot
@@ -233,6 +233,42 @@
     ...    AND
     ...    Run Keyword If    "${KSERVE_MODE}"=="RawDeployment"    Terminate Process    llm-query-process    kill=true
 
+Verify User Can Serve And Query A codellama/codellama-34b-instruct-hf Model
+    [Documentation]    Basic tests for preparing, deploying and querying a LLM model
+    ...                using Kserve and TGIS runtime
+    [Tags]    Tier1    RHOAIENG-4200
+    Setup Test Variables    model_name=codellama-34b-instruct-hf    use_pvc=${USE_PVC}    use_gpu=${USE_GPU}
+    ...    kserve_mode=${KSERVE_MODE}   model_path=codellama-34b-instruct-hf
+    ${test_namespace}=   Set Variable    codellama-34b
+    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=${test_namespace}
+    ...    download_in_pvc=${DOWNLOAD_IN_PVC}    model_name=${model_name}
+    ...    storage_size=80Gi   model_path=${model_path}
+    ${requests}=    Create Dictionary    memory=130Gi
+    Compile Inference Service YAML    isvc_name=${model_name}
+    ...    sa_name=${EMPTY}
+    ...    model_storage_uri=${storage_uri}
+    ...    model_format=pytorch    serving_runtime=${TGIS_RUNTIME_NAME}
+    ...    limits_dict=${limits}    requests_dict=${requests}    kserve_mode=${KSERVE_MODE}
+    Deploy Model Via CLI    isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH}
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${model_name}
+    ...    namespace=${test_namespace}    timeout=3000s
+    Run Keyword If    "${KSERVE_MODE}"=="RawDeployment"
+    ...    Start Port-forwarding    namespace=${test_namespace}    model_name=${model_name}
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=all-tokens    n_times=1    protocol=grpc
+    ...    namespace=${test_namespace}   query_idx=5   validate_response=${TRUE}
+    ...    port_forwarding=${use_port_forwarding}
+    Query Model Multiple Times    model_name=${model_name}    runtime=${TGIS_RUNTIME_NAME}
+    ...    inference_type=streaming    n_times=1    protocol=grpc
+    ...    namespace=${test_namespace}    query_idx=5    validate_response=${FALSE}
+    ...    port_forwarding=${use_port_forwarding}
+    [Teardown]    Run Keywords
+    ...    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}    wait_prj_deletion=${FALSE}
+    ...    AND
+    ...    Run Keyword If    "${KSERVE_MODE}"=="RawDeployment"    Terminate Process    llm-query-process    kill=true
+
 
 *** Keywords ***
 Suite Setup