From f6cd436f5901f79ccb50485ec28558be6732d082 Mon Sep 17 00:00:00 2001 From: Max Norton Date: Tue, 18 Feb 2025 13:29:12 +0000 Subject: [PATCH] Incorporate separate StackHPC cloud tests for monitoring and for hosts (#1501) CI: Add Grafana and OpenSearch Dashboards variables for SOT Depends-On: https://github.com/stackhpc/stackhpc-openstack-tests/pull/3 Depends-On: https://github.com/stackhpc/stackhpc-openstack-tests/pull/4 Co-authored-by: Mark Goddard --- .github/workflows/stackhpc-all-in-one.yml | 24 ++-- etc/kayobe/ansible/stackhpc-cloud-tests.yml | 115 ++++++++++++++++++-- etc/kayobe/inventory/group_vars/all/selinux | 5 + 3 files changed, 124 insertions(+), 20 deletions(-) create mode 100644 etc/kayobe/inventory/group_vars/all/selinux diff --git a/.github/workflows/stackhpc-all-in-one.yml b/.github/workflows/stackhpc-all-in-one.yml index f05b8a108..3b01181bf 100644 --- a/.github/workflows/stackhpc-all-in-one.yml +++ b/.github/workflows/stackhpc-all-in-one.yml @@ -468,7 +468,7 @@ jobs: -v $(pwd)/sct-results:/stack/sct-results \ -e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \ $KAYOBE_IMAGE \ - /stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/stackhpc-cloud-tests.yml' -e sot_version=${{ inputs.stackhpc_cloud_tests_version }} + /stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/stackhpc-cloud-tests.yml' -e sct_version=${{ inputs.stackhpc_cloud_tests_version }} env: KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }} @@ -496,16 +496,20 @@ jobs: sct-results/ if: ${{ !cancelled() && (steps.tempest.outcome == 'success' || steps.stackhpc-cloud-tests.outcome == 'success' || steps.diagnostics.outcome == 'success') }} - - name: Fail if any Tempest tests failed + - name: Fail if any tests failed run: | - test $(wc -l < tempest-artifacts/failed-tests) -lt 1 - - - name: Fail if any StackHPC Cloud tests failed - run: | - echo "Some StackHPC Cloud tests failed." - echo "See HTML results artifact (sct-results) for details." - exit 1 - if: steps.stackhpc-cloud-tests.outcome == 'failure' + rc=0 + if [[ $(wc -l < tempest-artifacts/failed-tests) -ne 0 ]]; then + echo "Some Tempest tests failed." + echo "See HTML results artifact (tempest-artifacts) for details." + rc=1 + fi + if [[ $(wc -l < sct-results/failed-tests) -ne 0 ]]; then + echo "Some StackHPC Cloud tests failed." + echo "See HTML results artifact (sct-results) for details." + rc=1 + fi + exit $rc - name: Destroy run: terraform destroy -auto-approve diff --git a/etc/kayobe/ansible/stackhpc-cloud-tests.yml b/etc/kayobe/ansible/stackhpc-cloud-tests.yml index 458582080..b2fed995d 100644 --- a/etc/kayobe/ansible/stackhpc-cloud-tests.yml +++ b/etc/kayobe/ansible/stackhpc-cloud-tests.yml @@ -1,17 +1,24 @@ --- - name: Run StackHPC Cloud tests - hosts: tempest_runner + hosts: tempest_runner:overcloud tags: - stackhpc-cloud-tests vars: sct_venv: "{{ virtualenv_path }}/sct-venv" sct_repo: https://github.com/stackhpc/stackhpc-cloud-tests + # Define the version of SCT used for testing, the github workflow overrides this with + # stackhpc_cloud_tests_version so this is only used if running "locally". sct_version: main sct_timeout: 30 results_path_local: "{{ lookup('env', 'HOME') }}/sct-results" tasks: - name: Stackhpc Cloud tests block: + - name: Assert that there is only one host in the tempest_runner group + ansible.builtin.assert: + that: groups.get('tempest_runner', []) | length == 1 + fail_msg: The tempest_runner group should contain exactly one host + - name: Create a temporary directory for tests repo ansible.builtin.tempfile: state: directory @@ -45,7 +52,6 @@ - name: Ensure required individual Python packages are installed ansible.builtin.pip: name: - - "{{ repo_tmpdir.path }}" - pytest-html - pytest-timeout virtualenv: "{{ sct_venv }}" @@ -60,38 +66,125 @@ file: "{{ kayobe_env_config_path }}/kolla/passwords.yml" name: kolla_passwords - - name: Run StackHPC Cloud tests + # Monitoring tests should run once, executed on the host in the + # tempest_runner group. + - name: Check for StackHPC Cloud monitoring tests + ansible.builtin.stat: + path: "{{ repo_tmpdir.path }}/stackhpc_cloud_tests/monitoring" + register: stackhpc_cloud_monitoring_tests + + - name: Run StackHPC Cloud monitoring tests ansible.builtin.command: cmd: > {{ sct_venv }}/bin/py.test - --html={{ results_tmpdir.path }}/stackhpc-cloud-tests.html + --html={{ results_tmpdir.path }}/monitoring.html --self-contained-html - --pyargs stackhpc_cloud_tests --timeout {{ sct_timeout }} -rfEx -vv + "{{ repo_tmpdir.path }}/stackhpc_cloud_tests/monitoring" environment: + GRAFANA_URL: "{{ sct_grafana_url }}" + GRAFANA_USERNAME: "{{ sct_grafana_username }}" + GRAFANA_PASSWORD: "{{ sct_grafana_password }}" OPENSEARCH_HOSTS: "{{ sct_opensearch_hosts }}" OPENSEARCH_PORT: "{{ sct_opensearch_port }}" OPENSEARCH_TLS: "{{ sct_opensearch_tls }}" + OPENSEARCH_DASHBOARDS_URL: "{{ sct_opensearch_dashboards_url }}" + OPENSEARCH_DASHBOARDS_USERNAME: "{{ sct_opensearch_dashboards_username }}" + OPENSEARCH_DASHBOARDS_PASSWORD: "{{ sct_opensearch_dashboards_password }}" PROMETHEUS_URL: "{{ sct_prometheus_url }}" PROMETHEUS_USERNAME: "{{ sct_prometheus_username }}" PROMETHEUS_PASSWORD: "{{ sct_prometheus_password }}" vars: kolla_external_scheme: "{{ 'https' if kolla_enable_tls_external | bool else 'http' }}" kolla_internal_scheme: "{{ 'https' if kolla_enable_tls_internal | bool else 'http' }}" + sct_grafana_url: "{{ kolla_external_scheme }}://{{ kolla_external_fqdn }}:3000" + sct_grafana_username: "grafana_local_admin" + sct_grafana_password: "{{ kolla_passwords.grafana_admin_password }}" sct_opensearch_hosts: "{{ kolla_internal_fqdn }}" sct_opensearch_port: 9200 - sct_opensearch_tls: false + sct_opensearch_tls: "{{ kolla_enable_tls_internal | bool }}" + sct_opensearch_dashboards_url: "{{ kolla_external_scheme }}://{{ kolla_external_fqdn }}:5601" + sct_opensearch_dashboards_username: "opensearch" + sct_opensearch_dashboards_password: "{{ kolla_passwords.opensearch_dashboards_password }}" sct_prometheus_url: "{{ kolla_internal_scheme }}://{{ kolla_internal_fqdn }}:9091" sct_prometheus_username: admin sct_prometheus_password: "{{ kolla_passwords.prometheus_password }}" + failed_when: monitoring_results.rc not in [0, 1] + register: monitoring_results + when: "'tempest_runner' in group_names and stackhpc_cloud_monitoring_tests.stat.exists" + + # Host tests should run on every host in the overcloud group. + # TODO: Use TestInfra's native Ansible or SSH connection plugins for + # remote test execution? That would place all results in a single file + # and allow us to execute all tests from a single host. + # https://testinfra.readthedocs.io/en/latest/backends.html#connection-backends + - name: Check for StackHPC Cloud host tests + ansible.builtin.stat: + path: "{{ repo_tmpdir.path }}/stackhpc_cloud_tests/host" + register: stackhpc_cloud_host_tests + + - name: Run StackHPC Cloud host tests + ansible.builtin.command: + cmd: > + {{ sct_venv }}/bin/py.test + --html={{ results_tmpdir.path }}/host-{{ inventory_hostname }}.html + --self-contained-html + --timeout {{ sct_timeout }} + -vv + "{{ repo_tmpdir.path }}/stackhpc_cloud_tests/host" + environment: + DOCKER_VERSION_MIN: "{{ sct_docker_version_min }}" + DOCKER_VERSION_MAX: "{{ sct_docker_version_max }}" + SELINUX_STATE: "{{ sct_selinux_state }}" + vars: + # Inclusive min + sct_docker_version_min: "24.0.0" + # Exclusive max + sct_docker_version_max: "28.0.0" + sct_selinux_state: "{{ selinux_state }}" + failed_when: host_results.rc not in [0, 1] + register: host_results + # Some host checks may need to run as root + become: true + when: "'overcloud' in group_names and stackhpc_cloud_host_tests.stat.exists" + + # Host test results will be owned by root - we need to read and delete them + - name: Change permissions on SCT host test results + ansible.builtin.command: + cmd: chmod 666 {{ results_tmpdir.path }}/host-{{ inventory_hostname }}.html + become: true + when: "'overcloud' in group_names and stackhpc_cloud_host_tests.stat.exists" + always: - - name: Fetch results - ansible.builtin.fetch: - src: "{{ results_tmpdir.path }}/stackhpc-cloud-tests.html" + - name: Synchronize results + ansible.posix.synchronize: + src: "{{ results_tmpdir.path }}/" dest: "{{ results_path_local }}/" - flat: true + mode: pull + archive: no + recursive: true + # For jump host + use_ssh_args: true + + - name: Write a file containing failed test runs + ansible.builtin.copy: + content: |- + {% for host in ansible_play_hosts_all %} + {% if host not in ansible_play_hosts %} + {{ host }}: Host failure + {% endif %} + {% if hostvars[host].monitoring_results.rc | default(0) != 0 %} + monitoring.html + {% endif %} + {% if hostvars[host].host_results.rc | default(0) != 0 %} + host-{{ host }}.html + {% endif %} + {% endfor %} + dest: "{{ results_path_local }}/failed-tests" + delegate_to: localhost + run_once: true - name: Clean up temporary directory ansible.builtin.file: @@ -100,3 +193,5 @@ loop: - "{{ repo_tmpdir.path }}" - "{{ results_tmpdir.path }}" + # Some files used by host tests may now be owned by root + become: true diff --git a/etc/kayobe/inventory/group_vars/all/selinux b/etc/kayobe/inventory/group_vars/all/selinux new file mode 100644 index 000000000..80d084d7e --- /dev/null +++ b/etc/kayobe/inventory/group_vars/all/selinux @@ -0,0 +1,5 @@ +--- +# Target SELinux state +# NOTE(MaxN) In StackHPC cloud tests we're checking the host's SELinux state matches the targeted state +# but we can't access what was defined upstream so we redefine here - this must follow any upstream change. +selinux_state: permissive