Skip to content

Commit

Permalink
Backport required changes from master to releases/2.8.0 (#1318)
Browse files Browse the repository at this point in the history
This PR contains following fixes from the `master` branch:

* edafbcc (#1296)
* 5e7f4b1 (#1302)
* b6fb7f7 (#1307)
* 9485246 (#1314)
* abfb8a5 (#1316)
* fc76761 (#1317)
* 439e2b3 (#1306)
* 5db9e26 (#1324)

---

I kept the order in which these commits were merged in the `master`
branch and provided both link to the particular commit and also link to
the particular PR so it's clearer to understand later on.
  • Loading branch information
jstourac authored Mar 28, 2024
2 parents 1ce419d + 1ef8bb5 commit 4088985
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 76 deletions.
20 changes: 11 additions & 9 deletions ods_ci/tasks/Resources/Provisioning/Hive/OSP/create_fips.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ export BASE_DOMAIN=${2:-$BASE_DOMAIN}
export OSP_NETWORK=${3:-$OSP_NETWORK}
export OSP_CLOUD=${4:-openstack}
export OUTPUT_DIR=${5:-.${CLUSTER_NAME}_conf}
export AWS_ACCESS_KEY_ID=${6:-$AWS_ACCESS_KEY_ID}
export AWS_SECRET_ACCESS_KEY=${7:-$AWS_SECRET_ACCESS_KEY}

# Cluster name should be converted to lowercase
export CLUSTER_NAME=${CLUSTER_NAME,,}
export CLUSTER_NAME=${CLUSTER_NAME,,}

if [[ -z $CLUSTER_NAME || -z $BASE_DOMAIN || -z $OSP_NETWORK ]] ; then
echo -e "Some global variables are missing, for example:
# export CLUSTER_NAME=${CLUSTER_NAME:-"rhods-qe-007"} # To set the cluster Subdomain (A Record) in AWS.
echo -e "Some global variables are missing, for example:
# export CLUSTER_NAME=${CLUSTER_NAME:-"rhods-qe-007"} # To set the cluster Subdomain (A Record) in AWS.
# export BASE_DOMAIN=${BASE_DOMAIN:-"rhods.ccitredhat.com"} # To set the cluster Domain in AWS.
# export OSP_NETWORK=${OSP_NETWORK:-"shared_net_5"} # The external network for the new Floating IPs on OSP.
"
Expand Down Expand Up @@ -88,11 +90,11 @@ fi

echo "Updating DNS records (cluster api's) in AWS Route53"
RESPONSE=$(aws route53 change-resource-record-sets --hosted-zone-id "$ZONE_ID" --change-batch \
'{ "Comment": "Update A record for cluster API", "Changes":
[ { "Action": "UPSERT", "ResourceRecordSet": { "Name": "api.'"$CLUSTER_NAME"'.'"$BASE_DOMAIN"'",
'{ "Comment": "Update A record for cluster API", "Changes":
[ { "Action": "UPSERT", "ResourceRecordSet": { "Name": "api.'"$CLUSTER_NAME"'.'"$BASE_DOMAIN"'",
"Type": "A", "TTL": 300, "ResourceRecords": [ { "Value": "'"$FIP_API"'" } ] } } ] }' --output json) || rc=$?
if [[ -n "$rc" ]] ; then
echo -e "Failed to update DNS A record in AWS for cluster API.
echo -e "Failed to update DNS A record in AWS for cluster API.
\n Releasing previously allocated floating IP in $OS_CLOUD ($FIP_API)"
openstack floating ip delete "$FIP_API"
exit ${rc:+$rc}
Expand All @@ -103,12 +105,12 @@ aws route53 wait resource-record-sets-changed --id "$(echo "$RESPONSE" | jq -r '

echo "Updating DNS records (cluster ingress) in AWS Route53"
RESPONSE=$(aws route53 change-resource-record-sets --hosted-zone-id "$ZONE_ID" --change-batch \
'{ "Comment": "Update A record for cluster APPS", "Changes":
[ { "Action": "UPSERT", "ResourceRecordSet": { "Name": "*.apps.'"$CLUSTER_NAME"'.'"$BASE_DOMAIN"'",
'{ "Comment": "Update A record for cluster APPS", "Changes":
[ { "Action": "UPSERT", "ResourceRecordSet": { "Name": "*.apps.'"$CLUSTER_NAME"'.'"$BASE_DOMAIN"'",
"Type": "A", "TTL": 300, "ResourceRecords": [ { "Value": "'"$FIP_APPS"'" } ] } } ] }' --output json) || rc=$?

if [[ -n "$rc" ]] ; then
echo -e "Failed to update DNS A record in AWS for cluster APPS.
echo -e "Failed to update DNS A record in AWS for cluster APPS.
\n Releasing previously allocated floating IP in $OS_CLOUD ($FIP_APPS)"
openstack floating ip delete "$FIP_APPS"
exit ${rc:+$rc}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ items:
name: ${infrastructure_configurations}[hive_cluster_name]-sec # see line 30
size: 1
maxSize: 1
maxConcurrent: 1
runningCount: 1
skipMachinePools: true
- apiVersion: v1
Expand Down Expand Up @@ -61,7 +60,7 @@ items:
platform:
openstack:
cloud: ${infrastructure_configurations}[osp_cloud_name]
computeFlavor: m1.large
computeFlavor: g.standard.xxl
externalDNS: null
externalNetwork: ${infrastructure_configurations}[osp_network]
pullSecret: '${infrastructure_configurations}[quay_pull_sec]'
Expand Down Expand Up @@ -100,4 +99,4 @@ items:
name: ${infrastructure_configurations}[image_set]
namespace: ${infrastructure_configurations}[hive_claim_ns]
spec:
releaseImage: quay.io/openshift-release-dev/ocp-release:${infrastructure_configurations}[ocp_version]-x86_64
releaseImage: quay.io/openshift-release-dev/ocp-release:${infrastructure_configurations}[ocp_version]-x86_64
38 changes: 16 additions & 22 deletions ods_ci/tasks/Resources/Provisioning/Hive/provision.robot
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Create Floating IPs
${shell_script} = Catenate
... ${CURDIR}/OSP/create_fips.sh ${cluster_name} ${infrastructure_configurations}[base_domain]
... ${infrastructure_configurations}[osp_network] ${infrastructure_configurations}[osp_cloud_name] ${artifacts_dir}/
... ${infrastructure_configurations}[AWS_ACCESS_KEY_ID] ${infrastructure_configurations}[AWS_SECRET_ACCESS_KEY]
${return_code} = Run and Watch Command ${shell_script} output_should_contain=Exporting Floating IPs
Should Be Equal As Integers ${return_code} 0 msg=Error creating floating IPs for cluster '${cluster_name}'
${fips_file_to_export} = Set Variable
Expand Down Expand Up @@ -145,6 +146,7 @@ Watch Hive Install Log

Wait For Cluster To Be Ready
${pool_namespace} = Get Cluster Pool Namespace ${pool_name}
Set Task Variable ${pool_namespace}
Log Watching Hive Pool namespace: ${pool_namespace} console=True
${install_log_file} = Set Variable ${artifacts_dir}/${cluster_name}_install.log
Create File ${install_log_file}
Expand All @@ -157,7 +159,7 @@ Wait For Cluster To Be Ready
... oc -n ${pool_namespace} get cd ${pool_namespace} -o json | jq -r '.status.webConsoleURL' --exit-status
... shell=yes
${claim_status} = Run Process
... oc -n ${hive_namespace} wait --for\=condition\=ClusterRunning\=True clusterclaim ${claim_name} --timeout\=10m shell=yes # robocop: disable:line-too-long
... oc -n ${hive_namespace} wait --for\=condition\=ClusterRunning\=True clusterclaim ${claim_name} --timeout\=15m shell=yes # robocop: disable:line-too-long
# Workaround for old Hive with Openstack - Cluster is displayed as Resuming even when it is Running
# add also support to the new Hive where the Cluster is displayed as Running
IF "${provider_type}" == "OSP"
Expand All @@ -177,28 +179,29 @@ Wait For Cluster To Be Ready

Save Cluster Credentials
Set Task Variable ${cluster_details} ${artifacts_dir}/${cluster_name}_details.txt
Set Task Variable ${cluster_kubeconf} ${artifacts_dir}/kubeconfig
${pool_namespace} = Get Cluster Pool Namespace ${pool_name}
${result} = Run Process oc -n ${pool_namespace} get cd ${pool_namespace} -o json | jq -r '.status.apiURL' --exit-status shell=yes
Should Be True ${result.rc} == 0 Hive Cluster deployment '${pool_namespace}' does not have a valid API access
${result} = Run Process
... oc -n ${pool_namespace} get cd ${pool_namespace} -o json | jq -r '.status.webConsoleURL' --exit-status
... shell=yes
Should Be True ${result.rc} == 0
... Hive Cluster deployment '${pool_namespace}' does not have a valid webConsoleURL access
Create File ${cluster_details} console=${result.stdout}\n
${ClusterDeployment} = Oc Get kind=ClusterDeployment name=${pool_namespace}
... namespace=${pool_namespace} api_version=hive.openshift.io/v1
${apiURL} = Set Variable "${ClusterDeployment[0]['status']['apiURL']}"
Append to File ${cluster_details} api=${apiURL}\n
${result} = Run Process
... oc -n ${pool_namespace} get cd ${pool_namespace} -o json | jq -r '.status.apiURL' --exit-status
... shell=yes
Append To File ${cluster_details} api=${result.stdout}\n
${result} = Run Process oc extract -n ${pool_namespace} --confirm secret/$(oc -n ${pool_namespace} get cd ${pool_namespace} -o jsonpath\='{.spec.clusterMetadata.adminPasswordSecretRef.name}') --to\=${artifacts_dir}
... shell=yes
Should Be True ${result.rc} == 0
${username} = Get File ${artifacts_dir}/username
${password} = Get File ${artifacts_dir}/password
Append to File ${cluster_details} username=${username}\n
Append to File ${cluster_details} password=${password}\n
Append To File ${cluster_details} username=${username}\n
Append To File ${cluster_details} password=${password}\n
${result} = Run Process oc extract -n ${pool_namespace} --confirm secret/$(oc -n ${pool_namespace} get cd ${pool_namespace} -o jsonpath\='{.spec.clusterMetadata.adminKubeconfigSecretRef.name}') --to\=${artifacts_dir}
... shell=yes
Should Be True ${result.rc} == 0
RETURN ${cluster_kubeconf}

Login To Cluster
${cluster_kubeconf} = Set Variable ${artifacts_dir}/kubeconfig
Export Variables From File ${cluster_details}
Create File ${cluster_kubeconf}
# Test the extracted credentials
Expand All @@ -209,18 +212,9 @@ Login To Cluster
Log ${result.stdout}\n${result.stderr} console=True
Should Be True ${result.rc} == 0

Set Cluster Storage
Log Update Cluster ${cluster_name} Storage Class console=True
${result} = Run Process oc --kubeconfig\=${cluster_kubeconf} patch StorageClass standard -p '{"metadata": {"annotations": {"storageclass.kubernetes.io/is-default-class": "false"}}}'
... shell=yes
Log StorageClass standard:\n${result.stdout}\n${result.stderr} console=True
${result} = Run Process oc --kubeconfig\=${cluster_kubeconf} patch StorageClass standard-csi -p '{"metadata": {"annotations": {"storageclass.kubernetes.io/is-default-class": "true"}}}'
... shell=yes
Log StorageClass standard-csi:\n${result.stdout}\n${result.stderr} console=True
Run Keyword And Ignore Error Should Be True ${result.rc} == 0

Get Cluster Pool Namespace
[Arguments] ${hive_pool_name}
Log Cluster pool name is: ${hive_pool_name} console=True
${namespace} = Wait Until Keyword Succeeds 2 min 2 s
... Oc Get kind=Namespace label_selector=hive.openshift.io/cluster-pool-name=${hive_pool_name}
${pool_namespace} = Set Variable ${namespace[0]['metadata']['name']}
Expand Down
3 changes: 1 addition & 2 deletions ods_ci/tasks/Tasks/provision_self_managed_cluster.robot
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ Provision Self-Managed Cluster
Wait For Cluster To Be Ready
Save Cluster Credentials
Login To Cluster
Set Cluster Storage
Pass Execution Self-Managed Cluster ${cluster_name} provisionend successfully

Deprovision Self-Managed Cluster
Expand All @@ -55,4 +54,4 @@ Delete GPU Node From Self-Managed AWS Cluster
Disconnect Self-Managed Cluster
[Documentation] Disconnect a self-managed cluster
[Tags] self_managed_disconnect
Disconnect Cluster
Disconnect Cluster
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Perform Request
Remove OAuth Token From Header headers=${response.headers} token=${request_args}[headers][Cookie]
Set Log Level INFO
Set To Dictionary ${LOG_DICT} url=${response.request.url} headers=${response.request.headers}
... body=${response.request.headers} status_code=${response.status_code}
... body=${response.request.body} status_code=${response.status_code}
Log ${request_type} Request: ${LOG_DICT}
Set To Dictionary ${LOG_RESP_DICT} url=${response.url} headers=${response.headers} body=${response.text}
... status_code=${response.status_code} reason=${response.reason}
Expand Down
21 changes: 12 additions & 9 deletions ods_ci/tests/Tests/400__ods_dashboard/414__ods_dashboard_api.robot
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
*** Settings ***
Documentation Suite for a basic security test of Dashboard APIs. The tests verifies that user
... reach endpoints based on their user permissions
... reach endpoints based on their user permissions.
... Refer to this file https://github.com/opendatahub-io/odh-dashboard/blob/main/frontend/src/types.ts
... to read particular API definitions.
Library OpenShiftLibrary
Library SeleniumLibrary
Resource ../../Resources/Common.robot
Expand Down Expand Up @@ -72,8 +74,8 @@ ${VALIDATE_ISV_RESULT_ENDPOINT}= api/validate-isv/results?appName=anacon
${NB_ENDPOINT_PT0}= api/notebooks
${NB_ENDPOINT_PT1}= ${NB_ENDPOINT_PT0}/${NOTEBOOK_NS}/
${NB_ENDPOINT_PT2}= /status
${NB_ENDPOINT_BODY_A}= {"notebookSizeName":"Small","imageName":"s2i-minimal-notebook","imageTagName":"<IMAGETAGNAME>","url":"${ODH_DASHBOARD_URL}","gpus":0,"envVars":{"configMap":{},"secrets":{"super-secre":"my new secret 20!"}},"state":"started"}
${NB_ENDPOINT_BODY_B}= {"notebookSizeName":"Small","imageName":"s2i-minimal-notebook","imageTagName":"<IMAGETAGNAME>","url":"${ODH_DASHBOARD_URL}","gpus":0,"envVars":{"configMap":{},"secrets":{"super-secre":"my new secret 20!"}},"state":"started","username":"<USERNAME>"}
${NB_ENDPOINT_BODY_A}= {"notebookSizeName":"Small","imageName":"s2i-minimal-notebook","imageTagName":"<IMAGETAGNAME>","acceleratorProfile": {"count": 0},"envVars":{"configMap":{},"secrets":{"super-secret":"my new secret 20!"}},"state":"started"} #robocop: disable:line-too-long
${NB_ENDPOINT_BODY_B}= {"notebookSizeName":"Small","imageName":"s2i-minimal-notebook","imageTagName":"<IMAGETAGNAME>","acceleratorProfile": {"count": 0},"envVars":{"configMap":{},"secrets":{"super-secret":"my new secret 20!"}},"state":"started","username":"<USERNAME>"} #robocop: disable:line-too-long
${NB_STOP_ENDPOINT_BODY_A}= {"state":"stopped"}
${NB_STOP_ENDPOINT_BODY_B}= {"state":"stopped","username": "<USERNAME>"}

Expand Down Expand Up @@ -835,21 +837,22 @@ Delete Dummy ConfigMaps
Delete Test Notebooks CRs And PVCs From CLI
[Documentation] Stops all the notebook servers spanwed during a test by
... deleting their CRs. At the end it closes any opened browsers
${CR_1}= Get User CR Notebook Name ${TEST_USER_3.USERNAME}
${CR_2}= Get User CR Notebook Name ${TEST_USER_4.USERNAME}
${test_crs}= Create List ${CR_1} ${CR_2}
FOR ${nb_cr} IN @{test_crs}
${present}= Run Keyword And Return Status OpenshiftLibrary.Oc Get kind=Notebook namespace=${NOTEBOOK_NS} name=${nb_cr}
${test_usernames}= Create List ${TEST_USER.USERNAME} ${TEST_USER_3.USERNAME} ${TEST_USER_4.USERNAME}
FOR ${username} IN @{test_usernames}
${nb_cr}= Get User CR Notebook Name ${username}
${present}= Run Keyword And Return Status
... OpenshiftLibrary.Oc Get kind=Notebook namespace=${NOTEBOOK_NS} name=${nb_cr}
IF ${present} == ${FALSE}
Continue For Loop
ELSE
OpenshiftLibrary.Oc Delete kind=Notebook namespace=${NOTEBOOK_NS} name=${nb_cr}
END
END
Close All Browsers
${PVC_ADMIN_USER}= Get User Notebook PVC Name ${TEST_USER.USERNAME}
${PVC_BASIC_USER}= Get User Notebook PVC Name ${TEST_USER_3.USERNAME}
${PVC_BASIC_USER_2}= Get User Notebook PVC Name ${TEST_USER_4.USERNAME}
${test_pvcs}= Create List ${PVC_BASIC_USER} ${PVC_BASIC_USER_2}
${test_pvcs}= Create List ${PVC_ADMIN_USER} ${PVC_BASIC_USER} ${PVC_BASIC_USER_2}
Delete Test PVCs pvc_names=${test_pvcs}

Set Username In Secret Payload
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ Verify Openvino_IR Model Via UI

Test Inference Without Token Authentication
[Documentation] Test the inference result after having deployed a model that doesn't require Token Authentication
[Tags] Smoke
... Intermittently failing: RHOAIENG-3115
[Tags] Smoke FlakyTest
... ODS-2053
Run Keyword And Continue On Failure Verify Model Inference ${MODEL_NAME} ${INFERENCE_INPUT_OPENVINO}
... ${EXPECTED_INFERENCE_OUTPUT_OPENVINO} token_auth=${FALSE}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ ${BLOOM_MODEL_S3_DIR}= bloom-560m/bloom-560m-caikit
Verify User Can Serve And Query A Model Using The UI
[Documentation] Basic tests for preparing, deploying and querying a LLM model
... using Kserve and Caikit runtime
[Tags] Smoke Tier1 ODS-2519 ODS-2522
... Intermittently failing: RHOAIENG-3148
[Tags] Smoke Tier1 ODS-2519 ODS-2522 FlakyTest
[Setup] Set Up Project namespace=${TEST_NS}
${test_namespace}= Set Variable ${TEST_NS}
${flan_model_name}= Set Variable flan-t5-small-caikit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,9 @@ Resource ../../../tasks/Resources/RHODS_OLM/install/oc_install.robot


*** Variables ***
${KUEUE_DIR} kueue
${KUEUE_REPO_URL} %{KUEUE_REPO_URL=https://github.com/opendatahub-io/kueue.git}
${KUEUE_REPO_BRANCH} %{KUEUE_REPO_BRANCH=dev}
${JOB_GO_BIN} %{WORKSPACE=.}/go-bin
${KUBECONFIG} %{WORKSPACE=.}/kconfig
${WORKER_NODE} ${EMPTY}

${KUEUE_KUBECONFIG} %{HOME}/.kube/config
${WORKER_NODE} ${EMPTY}
${KUEUE_RELEASE_ASSETS} %{KUEUE_RELEASE_ASSETS=https://github.com/opendatahub-io/kueue/releases/latest/download}

*** Test Cases ***
Run E2E test
Expand All @@ -36,11 +32,13 @@ Run Sanity test
*** Keywords ***
Prepare Kueue E2E Test Suite
[Documentation] Prepare Kueue E2E Test Suite
${result} = Run Process git clone -b ${KUEUE_REPO_BRANCH} ${KUEUE_REPO_URL} ${KUEUE_DIR}
... shell=true stderr=STDOUT
Log To Console "Downloading compiled test binary e2e-singlecluster"
${result} = Run Process curl --location --silent --output e2e-singlecluster ${KUEUE_RELEASE_ASSETS}/e2e-singlecluster && chmod +x e2e-singlecluster
... shell=true
... stderr=STDOUT
Log To Console ${result.stdout}
IF ${result.rc} != 0
FAIL Unable to clone kueue repo ${KUEUE_REPO_URL}:${KUEUE_REPO_BRANCH}:${KUEUE_DIR}
FAIL Unable to retrieve e2e-singlecluster compiled binary
END

Enable Component kueue
Expand All @@ -53,19 +51,16 @@ Prepare Kueue E2E Test Suite
${return_code} = Run And Return Rc oc label ${WORKER_NODE} instance-type=on-demand
Should Be Equal As Integers ${return_code} 0 msg=Fail to label worker node with instance-type=on-demand

# Use Go install command to install ginkgo
Log To Console Install ginkgo ...
${result} = Run Process go install github.com/onsi/ginkgo/v2/ginkgo
... shell=true stderr=STDOUT
... env:GOBIN=${JOB_GO_BIN}
... cwd=${KUEUE_DIR}
Teardown Kueue E2E Test Suite
[Documentation] Teardown Kueue E2E Test Suite
Log To Console "Removing test binaries"
${result} = Run Process rm -f e2e-singlecluster
... shell=true
... stderr=STDOUT
Log To Console ${result.stdout}
IF ${result.rc} != 0
FAIL Fail to install ginkgo
FAIL Unable to remove files
END

Teardown Kueue E2E Test Suite
[Documentation] Teardown Kueue E2E Test Suite
Disable Component kueue

# Remove label instance-type=on-demand from worker node
Expand All @@ -77,10 +72,9 @@ Run Kueue E2E Test
[Documentation] Run Kueue E2E Test
[Arguments] ${test_name}
Log To Console Running Kueue E2E test: ${test_name}
${result} = Run Process ginkgo --focus-file\=${test_name} ${KUEUE_DIR}/test/e2e/singlecluster
${result} = Run Process ./e2e-singlecluster -ginkgo.focus-file\=${test_name}
... shell=true stderr=STDOUT
... env:PATH=%{PATH}:${JOB_GO_BIN}
... env:KUBECONFIG=${KUBECONFIG}
... env:KUBECONFIG=${KUEUE_KUBECONFIG}
... env:NAMESPACE=${APPLICATIONS_NAMESPACE}
Log To Console ${result.stdout}
IF ${result.rc} != 0
Expand All @@ -91,10 +85,9 @@ Run Kueue Sanity Test
[Documentation] Run Kueue Sanity Test
[Arguments] ${test_name}
Log To Console Running Kueue Sanity test: ${test_name}
${result} = Run Process ginkgo --focus "${test_name}" ${KUEUE_DIR}/test/e2e/singlecluster
${result} = Run Process ./e2e-singlecluster -ginkgo.focus "${test_name}"
... shell=true stderr=STDOUT
... env:PATH=%{PATH}:${JOB_GO_BIN}
... env:KUBECONFIG=${KUBECONFIG}
... env:KUBECONFIG=${KUEUE_KUBECONFIG}
... env:NAMESPACE=${APPLICATIONS_NAMESPACE}
Log To Console ${result.stdout}
IF ${result.rc} != 0
Expand Down
Loading

0 comments on commit 4088985

Please sign in to comment.