Skip to content

Commit

Permalink
Incorporate separate StackHPC cloud tests for monitoring and for hosts (
Browse files Browse the repository at this point in the history
#1501)

CI: Add Grafana and OpenSearch Dashboards variables for SOT

Depends-On: stackhpc/stackhpc-cloud-tests#3
Depends-On: stackhpc/stackhpc-cloud-tests#4

Co-authored-by: Mark Goddard <[email protected]>
  • Loading branch information
maxstack and markgoddard authored Feb 18, 2025
1 parent cd0d44e commit f6cd436
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 20 deletions.
24 changes: 14 additions & 10 deletions .github/workflows/stackhpc-all-in-one.yml
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ jobs:
-v $(pwd)/sct-results:/stack/sct-results \
-e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \
$KAYOBE_IMAGE \
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/stackhpc-cloud-tests.yml' -e sot_version=${{ inputs.stackhpc_cloud_tests_version }}
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/stackhpc-cloud-tests.yml' -e sct_version=${{ inputs.stackhpc_cloud_tests_version }}
env:
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}

Expand Down Expand Up @@ -496,16 +496,20 @@ jobs:
sct-results/
if: ${{ !cancelled() && (steps.tempest.outcome == 'success' || steps.stackhpc-cloud-tests.outcome == 'success' || steps.diagnostics.outcome == 'success') }}

- name: Fail if any Tempest tests failed
- name: Fail if any tests failed
run: |
test $(wc -l < tempest-artifacts/failed-tests) -lt 1
- name: Fail if any StackHPC Cloud tests failed
run: |
echo "Some StackHPC Cloud tests failed."
echo "See HTML results artifact (sct-results) for details."
exit 1
if: steps.stackhpc-cloud-tests.outcome == 'failure'
rc=0
if [[ $(wc -l < tempest-artifacts/failed-tests) -ne 0 ]]; then
echo "Some Tempest tests failed."
echo "See HTML results artifact (tempest-artifacts) for details."
rc=1
fi
if [[ $(wc -l < sct-results/failed-tests) -ne 0 ]]; then
echo "Some StackHPC Cloud tests failed."
echo "See HTML results artifact (sct-results) for details."
rc=1
fi
exit $rc
- name: Destroy
run: terraform destroy -auto-approve
Expand Down
115 changes: 105 additions & 10 deletions etc/kayobe/ansible/stackhpc-cloud-tests.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
---
- name: Run StackHPC Cloud tests
hosts: tempest_runner
hosts: tempest_runner:overcloud
tags:
- stackhpc-cloud-tests
vars:
sct_venv: "{{ virtualenv_path }}/sct-venv"
sct_repo: https://github.com/stackhpc/stackhpc-cloud-tests
# Define the version of SCT used for testing, the github workflow overrides this with
# stackhpc_cloud_tests_version so this is only used if running "locally".
sct_version: main
sct_timeout: 30
results_path_local: "{{ lookup('env', 'HOME') }}/sct-results"
tasks:
- name: Stackhpc Cloud tests
block:
- name: Assert that there is only one host in the tempest_runner group
ansible.builtin.assert:
that: groups.get('tempest_runner', []) | length == 1
fail_msg: The tempest_runner group should contain exactly one host

- name: Create a temporary directory for tests repo
ansible.builtin.tempfile:
state: directory
Expand Down Expand Up @@ -45,7 +52,6 @@
- name: Ensure required individual Python packages are installed
ansible.builtin.pip:
name:
- "{{ repo_tmpdir.path }}"
- pytest-html
- pytest-timeout
virtualenv: "{{ sct_venv }}"
Expand All @@ -60,38 +66,125 @@
file: "{{ kayobe_env_config_path }}/kolla/passwords.yml"
name: kolla_passwords

- name: Run StackHPC Cloud tests
# Monitoring tests should run once, executed on the host in the
# tempest_runner group.
- name: Check for StackHPC Cloud monitoring tests
ansible.builtin.stat:
path: "{{ repo_tmpdir.path }}/stackhpc_cloud_tests/monitoring"
register: stackhpc_cloud_monitoring_tests

- name: Run StackHPC Cloud monitoring tests
ansible.builtin.command:
cmd: >
{{ sct_venv }}/bin/py.test
--html={{ results_tmpdir.path }}/stackhpc-cloud-tests.html
--html={{ results_tmpdir.path }}/monitoring.html
--self-contained-html
--pyargs stackhpc_cloud_tests
--timeout {{ sct_timeout }}
-rfEx
-vv
"{{ repo_tmpdir.path }}/stackhpc_cloud_tests/monitoring"
environment:
GRAFANA_URL: "{{ sct_grafana_url }}"
GRAFANA_USERNAME: "{{ sct_grafana_username }}"
GRAFANA_PASSWORD: "{{ sct_grafana_password }}"
OPENSEARCH_HOSTS: "{{ sct_opensearch_hosts }}"
OPENSEARCH_PORT: "{{ sct_opensearch_port }}"
OPENSEARCH_TLS: "{{ sct_opensearch_tls }}"
OPENSEARCH_DASHBOARDS_URL: "{{ sct_opensearch_dashboards_url }}"
OPENSEARCH_DASHBOARDS_USERNAME: "{{ sct_opensearch_dashboards_username }}"
OPENSEARCH_DASHBOARDS_PASSWORD: "{{ sct_opensearch_dashboards_password }}"
PROMETHEUS_URL: "{{ sct_prometheus_url }}"
PROMETHEUS_USERNAME: "{{ sct_prometheus_username }}"
PROMETHEUS_PASSWORD: "{{ sct_prometheus_password }}"
vars:
kolla_external_scheme: "{{ 'https' if kolla_enable_tls_external | bool else 'http' }}"
kolla_internal_scheme: "{{ 'https' if kolla_enable_tls_internal | bool else 'http' }}"
sct_grafana_url: "{{ kolla_external_scheme }}://{{ kolla_external_fqdn }}:3000"
sct_grafana_username: "grafana_local_admin"
sct_grafana_password: "{{ kolla_passwords.grafana_admin_password }}"
sct_opensearch_hosts: "{{ kolla_internal_fqdn }}"
sct_opensearch_port: 9200
sct_opensearch_tls: false
sct_opensearch_tls: "{{ kolla_enable_tls_internal | bool }}"
sct_opensearch_dashboards_url: "{{ kolla_external_scheme }}://{{ kolla_external_fqdn }}:5601"
sct_opensearch_dashboards_username: "opensearch"
sct_opensearch_dashboards_password: "{{ kolla_passwords.opensearch_dashboards_password }}"
sct_prometheus_url: "{{ kolla_internal_scheme }}://{{ kolla_internal_fqdn }}:9091"
sct_prometheus_username: admin
sct_prometheus_password: "{{ kolla_passwords.prometheus_password }}"
failed_when: monitoring_results.rc not in [0, 1]
register: monitoring_results
when: "'tempest_runner' in group_names and stackhpc_cloud_monitoring_tests.stat.exists"

# Host tests should run on every host in the overcloud group.
# TODO: Use TestInfra's native Ansible or SSH connection plugins for
# remote test execution? That would place all results in a single file
# and allow us to execute all tests from a single host.
# https://testinfra.readthedocs.io/en/latest/backends.html#connection-backends
- name: Check for StackHPC Cloud host tests
ansible.builtin.stat:
path: "{{ repo_tmpdir.path }}/stackhpc_cloud_tests/host"
register: stackhpc_cloud_host_tests

- name: Run StackHPC Cloud host tests
ansible.builtin.command:
cmd: >
{{ sct_venv }}/bin/py.test
--html={{ results_tmpdir.path }}/host-{{ inventory_hostname }}.html
--self-contained-html
--timeout {{ sct_timeout }}
-vv
"{{ repo_tmpdir.path }}/stackhpc_cloud_tests/host"
environment:
DOCKER_VERSION_MIN: "{{ sct_docker_version_min }}"
DOCKER_VERSION_MAX: "{{ sct_docker_version_max }}"
SELINUX_STATE: "{{ sct_selinux_state }}"
vars:
# Inclusive min
sct_docker_version_min: "24.0.0"
# Exclusive max
sct_docker_version_max: "28.0.0"
sct_selinux_state: "{{ selinux_state }}"
failed_when: host_results.rc not in [0, 1]
register: host_results
# Some host checks may need to run as root
become: true
when: "'overcloud' in group_names and stackhpc_cloud_host_tests.stat.exists"

# Host test results will be owned by root - we need to read and delete them
- name: Change permissions on SCT host test results
ansible.builtin.command:
cmd: chmod 666 {{ results_tmpdir.path }}/host-{{ inventory_hostname }}.html
become: true
when: "'overcloud' in group_names and stackhpc_cloud_host_tests.stat.exists"

always:
- name: Fetch results
ansible.builtin.fetch:
src: "{{ results_tmpdir.path }}/stackhpc-cloud-tests.html"
- name: Synchronize results
ansible.posix.synchronize:
src: "{{ results_tmpdir.path }}/"
dest: "{{ results_path_local }}/"
flat: true
mode: pull
archive: no
recursive: true
# For jump host
use_ssh_args: true

- name: Write a file containing failed test runs
ansible.builtin.copy:
content: |-
{% for host in ansible_play_hosts_all %}
{% if host not in ansible_play_hosts %}
{{ host }}: Host failure
{% endif %}
{% if hostvars[host].monitoring_results.rc | default(0) != 0 %}
monitoring.html
{% endif %}
{% if hostvars[host].host_results.rc | default(0) != 0 %}
host-{{ host }}.html
{% endif %}
{% endfor %}
dest: "{{ results_path_local }}/failed-tests"
delegate_to: localhost
run_once: true

- name: Clean up temporary directory
ansible.builtin.file:
Expand All @@ -100,3 +193,5 @@
loop:
- "{{ repo_tmpdir.path }}"
- "{{ results_tmpdir.path }}"
# Some files used by host tests may now be owned by root
become: true
5 changes: 5 additions & 0 deletions etc/kayobe/inventory/group_vars/all/selinux
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
# Target SELinux state
# NOTE(MaxN) In StackHPC cloud tests we're checking the host's SELinux state matches the targeted state
# but we can't access what was defined upstream so we redefine here - this must follow any upstream change.
selinux_state: permissive

0 comments on commit f6cd436

Please sign in to comment.