From c3c3c67a9a8687e18366f33b214d1c1e4966a9e3 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 25 Nov 2024 14:27:28 -0700 Subject: [PATCH 1/7] test: add option to keep instance when test fails --- tests/integration_tests/clouds.py | 1 + tests/integration_tests/conftest.py | 8 +++++++- tests/integration_tests/integration_settings.py | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/clouds.py b/tests/integration_tests/clouds.py index f7dc1463f6c..8a52d7d7f57 100644 --- a/tests/integration_tests/clouds.py +++ b/tests/integration_tests/clouds.py @@ -66,6 +66,7 @@ def __init__( self.cloud_instance = self._get_cloud_instance() self.initial_image_id = self._get_initial_image() self.snapshot_id = None + self.test_failed = False @property def image_id(self): diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py index b62dae82af4..db797de1d0b 100644 --- a/tests/integration_tests/conftest.py +++ b/tests/integration_tests/conftest.py @@ -92,7 +92,12 @@ def session_cloud() -> Generator[IntegrationCloud, None, None]: cloud = platforms[integration_settings.PLATFORM](image_type=image_type) cloud.emit_settings_to_log() yield cloud - cloud.destroy() + + if not integration_settings.KEEP_INSTANCE or ( + integration_settings.KEEP_INSTANCE == "ON_ERROR" + and not cloud.test_failed + ): + cloud.destroy() def get_validated_source( @@ -323,6 +328,7 @@ def _client( yield instance test_failed = request.session.testsfailed - previous_failures > 0 _collect_artifacts(instance, request.node.nodeid, test_failed) + session_cloud.test_failed = test_failed # conflicting requirements: # - pytest thinks that it can cleanup loggers after tests run # - pycloudlib thinks that at garbage collection is a good place to tear diff --git a/tests/integration_tests/integration_settings.py b/tests/integration_tests/integration_settings.py index fb736abf5f7..3ed8fd0d9e3 100644 --- a/tests/integration_tests/integration_settings.py +++ b/tests/integration_tests/integration_settings.py @@ -9,6 +9,7 @@ ################################################################## # Keep instance (mostly for debugging) when test is finished +# set to "ON_ERROR" to only keep an instance when tests fail KEEP_INSTANCE = False # Keep snapshot image (mostly for debugging) when test is finished KEEP_IMAGE = False From 9f8ed0bbacfa515c858316a083e2eb14a6dafd4d Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 25 Nov 2024 14:28:26 -0700 Subject: [PATCH 2/7] test: make integration tests more robust to failure Retry when instance restart fails. Throw exception if too many failures occur. Retry when instance destory fails. Log error if too many failures occur. --- tests/integration_tests/instances.py | 30 +++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tests/integration_tests/instances.py b/tests/integration_tests/instances.py index a5ce8f1e14f..56cea0de26e 100644 --- a/tests/integration_tests/instances.py +++ b/tests/integration_tests/instances.py @@ -2,6 +2,7 @@ import logging import os import re +import time import uuid from enum import Enum from pathlib import Path @@ -70,10 +71,20 @@ def __init__( self._ip = "" def destroy(self): + wait = True if isinstance(self.instance, GceInstance): - self.instance.delete(wait=False) + wait = False + for i in range(1, 32): + try: + self.instance.delete(wait=wait) + break + except RuntimeError: + log.warning("Failed to clean instance, retrying.") + time.sleep(i) else: - self.instance.delete() + # This is just a cleanup mechanism. If we fail to clean the + # instance that doesn't mean that the test has actually failed. + log.error("Failed to clean instance") def restart(self): """Restart this instance (via cloud mechanism) and wait for boot. @@ -81,7 +92,20 @@ def restart(self): This wraps pycloudlib's `BaseInstance.restart` """ log.info("Restarting instance and waiting for boot") - self.instance.restart() + last_exception = None + for i in range(1, 32): + try: + self.instance.restart() + break + except RuntimeError as e: + last_exception = e + log.warning("Failed to restart instance.") + time.sleep(i) + else: + # this is a requirement for tests, so we must fail to prevent + # running tests in an unexpected state + log.error("Failed to restart instance.") + raise last_exception def execute(self, command, *, use_sudo=True) -> Result: if self.instance.username == "root" and use_sudo is False: From 568b317361a353d7c3c9152f9fdc1452077f9911 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 25 Nov 2024 18:13:06 -0700 Subject: [PATCH 3/7] fixup! test: add option to keep instance when test fails --- tests/integration_tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py index db797de1d0b..7401b963cc4 100644 --- a/tests/integration_tests/conftest.py +++ b/tests/integration_tests/conftest.py @@ -94,8 +94,8 @@ def session_cloud() -> Generator[IntegrationCloud, None, None]: yield cloud if not integration_settings.KEEP_INSTANCE or ( - integration_settings.KEEP_INSTANCE == "ON_ERROR" - and not cloud.test_failed + not cloud.test_failed and + integration_settings.KEEP_INSTANCE == "ON_ERROR" # type: ignore ): cloud.destroy() From d44c77ff5295d42f8d477ea5087421364c72526b Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 26 Nov 2024 07:27:22 -0700 Subject: [PATCH 4/7] fixup! test: add option to keep instance when test fails --- tests/integration_tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py index 7401b963cc4..bbcc3f6330d 100644 --- a/tests/integration_tests/conftest.py +++ b/tests/integration_tests/conftest.py @@ -94,8 +94,8 @@ def session_cloud() -> Generator[IntegrationCloud, None, None]: yield cloud if not integration_settings.KEEP_INSTANCE or ( - not cloud.test_failed and - integration_settings.KEEP_INSTANCE == "ON_ERROR" # type: ignore + not cloud.test_failed + and integration_settings.KEEP_INSTANCE == "ON_ERROR" ): cloud.destroy() From 9b18a3d571ebfff3a993a92c3b055bad980dd9b5 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 26 Nov 2024 07:34:35 -0700 Subject: [PATCH 5/7] fixup! test: add option to keep instance when test fails --- tests/integration_tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py index bbcc3f6330d..8aeff854a8d 100644 --- a/tests/integration_tests/conftest.py +++ b/tests/integration_tests/conftest.py @@ -94,7 +94,7 @@ def session_cloud() -> Generator[IntegrationCloud, None, None]: yield cloud if not integration_settings.KEEP_INSTANCE or ( - not cloud.test_failed + cloud.test_failed and integration_settings.KEEP_INSTANCE == "ON_ERROR" ): cloud.destroy() From 872d19a05895dd83dc17c59670d7e8c9f481af84 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 26 Nov 2024 07:36:11 -0700 Subject: [PATCH 6/7] fixup! test: add option to keep instance when test fails --- tests/integration_tests/conftest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py index 8aeff854a8d..4c14c4a306b 100644 --- a/tests/integration_tests/conftest.py +++ b/tests/integration_tests/conftest.py @@ -94,8 +94,7 @@ def session_cloud() -> Generator[IntegrationCloud, None, None]: yield cloud if not integration_settings.KEEP_INSTANCE or ( - cloud.test_failed - and integration_settings.KEEP_INSTANCE == "ON_ERROR" + cloud.test_failed and integration_settings.KEEP_INSTANCE == "ON_ERROR" ): cloud.destroy() From cef2710a6bd44e214f9ede841a8a34a1b55872ae Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 27 Nov 2024 12:23:45 -0700 Subject: [PATCH 7/7] fixup! test: add option to keep instance when test fails --- tests/integration_tests/clouds.py | 7 +++++++ tests/integration_tests/conftest.py | 7 ++----- tests/integration_tests/instances.py | 13 ++++++++++--- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/tests/integration_tests/clouds.py b/tests/integration_tests/clouds.py index 8a52d7d7f57..64417b5fc17 100644 --- a/tests/integration_tests/clouds.py +++ b/tests/integration_tests/clouds.py @@ -175,6 +175,13 @@ def destroy(self): "NOT cleaning cloud instance because KEEP_IMAGE or " "KEEP_INSTANCE is True" ) + elif ( + integration_settings.KEEP_IMAGE == "ON_ERROR" and self.test_failed + ): + log.info( + 'NOT cleaning cloud instance because KEEP_IMAGE="ON_ERROR" and' + "test failed" + ) else: self.cloud_instance.clean() diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py index 4c14c4a306b..16a123212b0 100644 --- a/tests/integration_tests/conftest.py +++ b/tests/integration_tests/conftest.py @@ -92,11 +92,7 @@ def session_cloud() -> Generator[IntegrationCloud, None, None]: cloud = platforms[integration_settings.PLATFORM](image_type=image_type) cloud.emit_settings_to_log() yield cloud - - if not integration_settings.KEEP_INSTANCE or ( - cloud.test_failed and integration_settings.KEEP_INSTANCE == "ON_ERROR" - ): - cloud.destroy() + cloud.destroy() def get_validated_source( @@ -327,6 +323,7 @@ def _client( yield instance test_failed = request.session.testsfailed - previous_failures > 0 _collect_artifacts(instance, request.node.nodeid, test_failed) + instance.test_failed = test_failed session_cloud.test_failed = test_failed # conflicting requirements: # - pytest thinks that it can cleanup loggers after tests run diff --git a/tests/integration_tests/instances.py b/tests/integration_tests/instances.py index 56cea0de26e..04201447ead 100644 --- a/tests/integration_tests/instances.py +++ b/tests/integration_tests/instances.py @@ -68,6 +68,7 @@ def __init__( self.cloud = cloud self.instance = instance self.settings = settings + self.test_failed = False self._ip = "" def destroy(self): @@ -348,7 +349,13 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - if not self.settings.KEEP_INSTANCE: - self.destroy() - else: + + if self.settings.KEEP_INSTANCE: log.info("Keeping Instance, public ip: %s", self.ip()) + elif ( + integration_settings.KEEP_INSTANCE == "ON_ERROR" + and self.test_failed + ): + log.info("Keeping Instance (test failed) public ip: %s", self.ip()) + else: + self.destroy()