Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve test robustness and simplify debugging ephemeral failures #5901

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
8 changes: 8 additions & 0 deletions tests/integration_tests/clouds.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(
self.cloud_instance = self._get_cloud_instance()
self.initial_image_id = self._get_initial_image()
self.snapshot_id: Optional[str] = None
self.test_failed = False

@property
def image_id(self):
Expand Down Expand Up @@ -178,6 +179,13 @@ def destroy(self):
"NOT cleaning cloud instance because KEEP_IMAGE or "
"KEEP_INSTANCE is True"
)
elif (
integration_settings.KEEP_IMAGE == "ON_ERROR" and self.test_failed
):
log.info(
'NOT cleaning cloud instance because KEEP_IMAGE="ON_ERROR" and'
"test failed"
)
else:
self.cloud_instance.clean()

Expand Down
2 changes: 2 additions & 0 deletions tests/integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ def _client(
yield instance
test_failed = request.session.testsfailed - previous_failures > 0
_collect_artifacts(instance, request.node.nodeid, test_failed)
instance.test_failed = test_failed
session_cloud.test_failed = test_failed
holmanb marked this conversation as resolved.
Show resolved Hide resolved
# conflicting requirements:
# - pytest thinks that it can cleanup loggers after tests run
# - pycloudlib thinks that at garbage collection is a good place to
Expand Down
39 changes: 35 additions & 4 deletions tests/integration_tests/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import re
import time
import uuid
from enum import Enum
from pathlib import Path
Expand Down Expand Up @@ -67,21 +68,45 @@ def __init__(
self.cloud = cloud
self.instance = instance
self.settings = settings
self.test_failed = False
self._ip = ""

def destroy(self):
wait = True
if isinstance(self.instance, GceInstance):
self.instance.delete(wait=False)
wait = False
for i in range(1, 32):
holmanb marked this conversation as resolved.
Show resolved Hide resolved
try:
self.instance.delete(wait=wait)
break
except RuntimeError:
log.warning("Failed to clean instance, retrying.")
time.sleep(i)
else:
self.instance.delete()
# This is just a cleanup mechanism. If we fail to clean the
# instance that doesn't mean that the test has actually failed.
log.error("Failed to clean instance")

def restart(self):
"""Restart this instance (via cloud mechanism) and wait for boot.

This wraps pycloudlib's `BaseInstance.restart`
"""
log.info("Restarting instance and waiting for boot")
self.instance.restart()
last_exception = None
for i in range(1, 32):
try:
self.instance.restart()
break
except RuntimeError as e:
last_exception = e
log.warning("Failed to restart instance.")
time.sleep(i)
else:
# this is a requirement for tests, so we must fail to prevent
# running tests in an unexpected state
log.error("Failed to restart instance.")
raise last_exception

def execute(self, command, *, use_sudo=True) -> Result:
if self.instance.username == "root" and use_sudo is False:
Expand Down Expand Up @@ -329,7 +354,13 @@ def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):

if not self.settings.KEEP_INSTANCE:
conftest.REAPER.reap(self)
elif (
integration_settings.KEEP_INSTANCE == "ON_ERROR"
and self.test_failed
):
log.info("Keeping Instance (test failed) public ip: %s", self.ip())
else:
log.info("Keeping Instance, public ip: %s", self.ip())
log.info("Keeping Instance public ip: %s", self.ip())
1 change: 1 addition & 0 deletions tests/integration_tests/integration_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
##################################################################

# Keep instance (mostly for debugging) when test is finished
# set to "ON_ERROR" to only keep an instance when tests fail
KEEP_INSTANCE = False
# Keep snapshot image (mostly for debugging) when test is finished
KEEP_IMAGE = False
Expand Down