Skip to content

Commit

Permalink
Chore: Use the latest stack event timestamp as a marker to get the ne…
Browse files Browse the repository at this point in the history
…w deployment events (#5852)

* add more logs to detect if teh customer machine has problems in time in the future

* update the wait_for_execute method

* clean the code

* fix typo
  • Loading branch information
moelasmar authored Oct 24, 2023
1 parent e4ef6e9 commit 572ca1d
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 15 deletions.
5 changes: 4 additions & 1 deletion samcli/commands/deploy/deploy_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,11 @@ def deploy(
if not click.confirm(f"{self.MSG_CONFIRM_CHANGESET}", default=False):
return

marker_time = self.deployer.get_last_event_time(stack_name, 0)
self.deployer.execute_changeset(result["Id"], stack_name, disable_rollback)
self.deployer.wait_for_execute(stack_name, changeset_type, disable_rollback, self.on_failure)
self.deployer.wait_for_execute(
stack_name, changeset_type, disable_rollback, self.on_failure, marker_time
)
click.echo(self.MSG_EXECUTE_SUCCESS.format(stack_name=stack_name, region=region))

except deploy_exceptions.ChangeEmptyError as ex:
Expand Down
48 changes: 38 additions & 10 deletions samcli/lib/deploy/deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from samcli.lib.package.s3_uploader import S3Uploader
from samcli.lib.utils.colors import Colored, Colors
from samcli.lib.utils.s3 import parse_s3_url
from samcli.lib.utils.time import utc_to_timestamp
from samcli.lib.utils.time import to_datetime, utc_to_timestamp

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -347,18 +347,19 @@ def execute_changeset(self, changeset_id, stack_name, disable_rollback):
except botocore.exceptions.ClientError as ex:
raise DeployFailedError(stack_name=stack_name, msg=str(ex)) from ex

def get_last_event_time(self, stack_name):
def get_last_event_time(self, stack_name, default_time=time.time()):
"""
Finds the last event time stamp thats present for the stack, if not get the current time
Finds the last event time stamp that presents for the stack, if not return the default_time
:param stack_name: Name or ID of the stack
:param default_time: the default unix epoch time to be returned in case if the stack provided does not exist
:return: unix epoch
"""
try:
return utc_to_timestamp(
self._client.describe_stack_events(StackName=stack_name)["StackEvents"][0]["Timestamp"]
)
except KeyError:
return time.time()
return default_time

@pprint_column_names(
format_string=DESCRIBE_STACK_EVENTS_FORMAT_STRING,
Expand All @@ -384,6 +385,7 @@ def describe_stack_events(
while stack_change_in_progress and retry_attempts <= self.max_attempts:
try:
# Only sleep if there have been no retry_attempts
LOG.debug("Trial # %d to get the stack %s create events", retry_attempts, stack_name)
time.sleep(0 if retry_attempts else self.client_sleep)
paginator = self._client.get_paginator("describe_stack_events")
response_iterator = paginator.paginate(StackName=stack_name)
Expand All @@ -393,18 +395,25 @@ def describe_stack_events(

for event_items in response_iterator:
for event in event_items["StackEvents"]:
LOG.debug("Stack Event: %s", event)
# Skip already shown old event entries or former deployments
if utc_to_timestamp(event["Timestamp"]) <= time_stamp_marker:
LOG.debug(
"Skip previous event as time_stamp_marker: %s is after the event time stamp: %s",
to_datetime(time_stamp_marker),
event["Timestamp"],
)
break
if event["EventId"] not in events:
events.add(event["EventId"])
# Events are in reverse chronological order
# Pushing in front reverse the order to display older events first
new_events.appendleft(event)
else: # go to next loop (page of events) if not break from inside loop
LOG.debug("Still in describe_stack_events loop, got to next page")
continue
break # reached here only if break from inner loop!

LOG.debug("Exit from the describe event loop")
# Override timestamp marker with latest event (last in deque)
if len(new_events) > 0:
time_stamp_marker = utc_to_timestamp(new_events[-1]["Timestamp"])
Expand All @@ -429,6 +438,12 @@ def describe_stack_events(
if self._is_root_stack_event(new_event) and self._check_stack_not_in_progress(
new_event["ResourceStatus"]
):
LOG.debug(
"Stack %s is not in progress. Its status is %s, and event is %s",
stack_name,
new_event["ResourceStatus"],
new_event,
)
stack_change_in_progress = False
break

Expand All @@ -439,8 +454,11 @@ def describe_stack_events(
"Stack with id {0} does not exist".format(stack_name) in str(ex)
and on_failure == FailureMode.DELETE
):
LOG.debug("Stack %s does not exist", stack_name)
return

LOG.debug("Trial # %d failed due to exception %s", retry_attempts, str(ex))

retry_attempts = retry_attempts + 1
if retry_attempts > self.max_attempts:
LOG.error("Describing stack events for %s failed: %s", stack_name, str(ex))
Expand All @@ -466,6 +484,7 @@ def wait_for_execute(
stack_operation: str,
disable_rollback: bool,
on_failure: FailureMode = FailureMode.ROLLBACK,
time_stamp_marker: float = 0,
) -> None:
"""
Wait for stack operation to execute and return when execution completes.
Expand All @@ -481,14 +500,16 @@ def wait_for_execute(
Preserves the state of previously provisioned resources when an operation fails
on_failure : FailureMode
The action to take when the operation fails
time_stamp_marker:
last event time on the stack to start streaming events from.
"""
sys.stdout.write(
"\n{} - Waiting for stack create/update "
"to complete\n".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
)
sys.stdout.flush()

self.describe_stack_events(stack_name, time.time() * 1000, on_failure)
self.describe_stack_events(stack_name, time_stamp_marker, on_failure)

# Pick the right waiter
if stack_operation == "CREATE":
Expand Down Expand Up @@ -624,9 +645,12 @@ def sync(

if exists:
kwargs["DisableRollback"] = disable_rollback # type: ignore

# get the latest stack event, and use 0 in case if the stack does not exist
marker_time = self.get_last_event_time(stack_name, 0)
result = self.update_stack(**kwargs)
self.wait_for_execute(stack_name, "UPDATE", disable_rollback, on_failure=on_failure)
self.wait_for_execute(
stack_name, "UPDATE", disable_rollback, on_failure=on_failure, time_stamp_marker=marker_time
)
msg = "\nStack update succeeded. Sync infra completed.\n"
else:
# Pass string representation of enum
Expand Down Expand Up @@ -701,8 +725,10 @@ def rollback_delete_stack(self, stack_name: str):
if current_state == "UPDATE_FAILED":
LOG.info("Stack %s failed to update, rolling back stack to previous state...", stack_name)

# get the latest stack event
marker_time = self.get_last_event_time(stack_name, 0)
self._client.rollback_stack(**kwargs)
self.describe_stack_events(stack_name, time.time() * 1000, FailureMode.DELETE)
self.describe_stack_events(stack_name, marker_time, FailureMode.DELETE)
self._rollback_wait(stack_name)

current_state = self._get_stack_status(stack_name)
Expand All @@ -712,13 +738,15 @@ def rollback_delete_stack(self, stack_name: str):
if current_state in failed_states:
LOG.info("Stack %s failed to create/update correctly, deleting stack", stack_name)

# get the latest stack event
marker_time = self.get_last_event_time(stack_name, 0)
self._client.delete_stack(**kwargs)

# only a stack that failed to create will have stack events, deleting
# from a ROLLBACK_COMPLETE state will not return anything
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation.html#CloudFormation.Client.delete_stack
if current_state == "CREATE_FAILED":
self.describe_stack_events(stack_name, time.time() * 1000, FailureMode.DELETE)
self.describe_stack_events(stack_name, marker_time, FailureMode.DELETE)

waiter = self._client.get_waiter("stack_delete_complete")
waiter.wait(StackName=stack_name, WaiterConfig={"Delay": 30, "MaxAttempts": 120})
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/commands/deploy/test_deploy_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def test_on_failure_delete_rollback_stack(self, mock_client, mock_session):
@patch.object(Deployer, "execute_changeset", MagicMock())
@patch.object(Deployer, "wait_for_execute", MagicMock())
@patch.object(Deployer, "create_and_wait_for_changeset", MagicMock(return_value=({"Id": "test"}, "CREATE")))
@patch.object(Deployer, "get_last_event_time", MagicMock(return_value=1000))
def test_on_failure_do_nothing(self, mock_session, mock_client):
with tempfile.NamedTemporaryFile(delete=False) as template_file:
template_file.write(b"{}")
Expand All @@ -268,5 +269,5 @@ def test_on_failure_do_nothing(self, mock_session, mock_client):
self.deploy_command_context.run()

self.deploy_command_context.deployer.wait_for_execute.assert_called_with(
ANY, "CREATE", False, FailureMode.DO_NOTHING
ANY, "CREATE", False, FailureMode.DO_NOTHING, 1000
)
7 changes: 4 additions & 3 deletions tests/unit/lib/deploy/test_deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,11 @@ def test_get_last_event_time(self):
self.assertEqual(self.deployer.get_last_event_time("test"), utc_to_timestamp(timestamp))

def test_get_last_event_time_unknown_last_time(self):
current_timestamp = datetime.utcnow()
time_float = time.time()
current_timestamp = to_datetime(time_float * 1000)
self.deployer._client.describe_stack_events = MagicMock(side_effect=KeyError)
# Convert to milliseconds from seconds
last_stack_event_timestamp = to_datetime(self.deployer.get_last_event_time("test") * 1000)
last_stack_event_timestamp = to_datetime(self.deployer.get_last_event_time("test", time_float) * 1000)
self.assertEqual(last_stack_event_timestamp.year, current_timestamp.year)
self.assertEqual(last_stack_event_timestamp.month, current_timestamp.month)
self.assertEqual(last_stack_event_timestamp.day, current_timestamp.day)
Expand Down Expand Up @@ -1384,7 +1385,7 @@ def test_rollback_stack_update_stack_delete(self):

self.assertEqual(self.deployer._client.rollback_stack.call_count, 1)
self.assertEqual(self.deployer._client.delete_stack.call_count, 1)
self.assertEqual(self.deployer._client.describe_stack_events.call_count, 0)
self.assertEqual(self.deployer._client.describe_stack_events.call_count, 2)

def test_rollback_invalid_stack_name(self):
self.deployer._client.describe_stacks = MagicMock(
Expand Down

0 comments on commit 572ca1d

Please sign in to comment.