From 572ca1d84157bdcae5160965bf61c0aef1e35aaa Mon Sep 17 00:00:00 2001 From: Mohamed Elasmar <71043312+moelasmar@users.noreply.github.com> Date: Tue, 24 Oct 2023 00:00:35 -0700 Subject: [PATCH] Chore: Use the latest stack event timestamp as a marker to get the new deployment events (#5852) * add more logs to detect if teh customer machine has problems in time in the future * update the wait_for_execute method * clean the code * fix typo --- samcli/commands/deploy/deploy_context.py | 5 +- samcli/lib/deploy/deployer.py | 48 +++++++++++++++---- .../commands/deploy/test_deploy_context.py | 3 +- tests/unit/lib/deploy/test_deployer.py | 7 +-- 4 files changed, 48 insertions(+), 15 deletions(-) diff --git a/samcli/commands/deploy/deploy_context.py b/samcli/commands/deploy/deploy_context.py index c088518337..ac21f5461e 100644 --- a/samcli/commands/deploy/deploy_context.py +++ b/samcli/commands/deploy/deploy_context.py @@ -267,8 +267,11 @@ def deploy( if not click.confirm(f"{self.MSG_CONFIRM_CHANGESET}", default=False): return + marker_time = self.deployer.get_last_event_time(stack_name, 0) self.deployer.execute_changeset(result["Id"], stack_name, disable_rollback) - self.deployer.wait_for_execute(stack_name, changeset_type, disable_rollback, self.on_failure) + self.deployer.wait_for_execute( + stack_name, changeset_type, disable_rollback, self.on_failure, marker_time + ) click.echo(self.MSG_EXECUTE_SUCCESS.format(stack_name=stack_name, region=region)) except deploy_exceptions.ChangeEmptyError as ex: diff --git a/samcli/lib/deploy/deployer.py b/samcli/lib/deploy/deployer.py index 58a2582403..0500a41901 100644 --- a/samcli/lib/deploy/deployer.py +++ b/samcli/lib/deploy/deployer.py @@ -39,7 +39,7 @@ from samcli.lib.package.s3_uploader import S3Uploader from samcli.lib.utils.colors import Colored, Colors from samcli.lib.utils.s3 import parse_s3_url -from samcli.lib.utils.time import utc_to_timestamp +from samcli.lib.utils.time import to_datetime, utc_to_timestamp LOG = logging.getLogger(__name__) @@ -347,10 +347,11 @@ def execute_changeset(self, changeset_id, stack_name, disable_rollback): except botocore.exceptions.ClientError as ex: raise DeployFailedError(stack_name=stack_name, msg=str(ex)) from ex - def get_last_event_time(self, stack_name): + def get_last_event_time(self, stack_name, default_time=time.time()): """ - Finds the last event time stamp thats present for the stack, if not get the current time + Finds the last event time stamp that presents for the stack, if not return the default_time :param stack_name: Name or ID of the stack + :param default_time: the default unix epoch time to be returned in case if the stack provided does not exist :return: unix epoch """ try: @@ -358,7 +359,7 @@ def get_last_event_time(self, stack_name): self._client.describe_stack_events(StackName=stack_name)["StackEvents"][0]["Timestamp"] ) except KeyError: - return time.time() + return default_time @pprint_column_names( format_string=DESCRIBE_STACK_EVENTS_FORMAT_STRING, @@ -384,6 +385,7 @@ def describe_stack_events( while stack_change_in_progress and retry_attempts <= self.max_attempts: try: # Only sleep if there have been no retry_attempts + LOG.debug("Trial # %d to get the stack %s create events", retry_attempts, stack_name) time.sleep(0 if retry_attempts else self.client_sleep) paginator = self._client.get_paginator("describe_stack_events") response_iterator = paginator.paginate(StackName=stack_name) @@ -393,8 +395,14 @@ def describe_stack_events( for event_items in response_iterator: for event in event_items["StackEvents"]: + LOG.debug("Stack Event: %s", event) # Skip already shown old event entries or former deployments if utc_to_timestamp(event["Timestamp"]) <= time_stamp_marker: + LOG.debug( + "Skip previous event as time_stamp_marker: %s is after the event time stamp: %s", + to_datetime(time_stamp_marker), + event["Timestamp"], + ) break if event["EventId"] not in events: events.add(event["EventId"]) @@ -402,9 +410,10 @@ def describe_stack_events( # Pushing in front reverse the order to display older events first new_events.appendleft(event) else: # go to next loop (page of events) if not break from inside loop + LOG.debug("Still in describe_stack_events loop, got to next page") continue break # reached here only if break from inner loop! - + LOG.debug("Exit from the describe event loop") # Override timestamp marker with latest event (last in deque) if len(new_events) > 0: time_stamp_marker = utc_to_timestamp(new_events[-1]["Timestamp"]) @@ -429,6 +438,12 @@ def describe_stack_events( if self._is_root_stack_event(new_event) and self._check_stack_not_in_progress( new_event["ResourceStatus"] ): + LOG.debug( + "Stack %s is not in progress. Its status is %s, and event is %s", + stack_name, + new_event["ResourceStatus"], + new_event, + ) stack_change_in_progress = False break @@ -439,8 +454,11 @@ def describe_stack_events( "Stack with id {0} does not exist".format(stack_name) in str(ex) and on_failure == FailureMode.DELETE ): + LOG.debug("Stack %s does not exist", stack_name) return + LOG.debug("Trial # %d failed due to exception %s", retry_attempts, str(ex)) + retry_attempts = retry_attempts + 1 if retry_attempts > self.max_attempts: LOG.error("Describing stack events for %s failed: %s", stack_name, str(ex)) @@ -466,6 +484,7 @@ def wait_for_execute( stack_operation: str, disable_rollback: bool, on_failure: FailureMode = FailureMode.ROLLBACK, + time_stamp_marker: float = 0, ) -> None: """ Wait for stack operation to execute and return when execution completes. @@ -481,6 +500,8 @@ def wait_for_execute( Preserves the state of previously provisioned resources when an operation fails on_failure : FailureMode The action to take when the operation fails + time_stamp_marker: + last event time on the stack to start streaming events from. """ sys.stdout.write( "\n{} - Waiting for stack create/update " @@ -488,7 +509,7 @@ def wait_for_execute( ) sys.stdout.flush() - self.describe_stack_events(stack_name, time.time() * 1000, on_failure) + self.describe_stack_events(stack_name, time_stamp_marker, on_failure) # Pick the right waiter if stack_operation == "CREATE": @@ -624,9 +645,12 @@ def sync( if exists: kwargs["DisableRollback"] = disable_rollback # type: ignore - + # get the latest stack event, and use 0 in case if the stack does not exist + marker_time = self.get_last_event_time(stack_name, 0) result = self.update_stack(**kwargs) - self.wait_for_execute(stack_name, "UPDATE", disable_rollback, on_failure=on_failure) + self.wait_for_execute( + stack_name, "UPDATE", disable_rollback, on_failure=on_failure, time_stamp_marker=marker_time + ) msg = "\nStack update succeeded. Sync infra completed.\n" else: # Pass string representation of enum @@ -701,8 +725,10 @@ def rollback_delete_stack(self, stack_name: str): if current_state == "UPDATE_FAILED": LOG.info("Stack %s failed to update, rolling back stack to previous state...", stack_name) + # get the latest stack event + marker_time = self.get_last_event_time(stack_name, 0) self._client.rollback_stack(**kwargs) - self.describe_stack_events(stack_name, time.time() * 1000, FailureMode.DELETE) + self.describe_stack_events(stack_name, marker_time, FailureMode.DELETE) self._rollback_wait(stack_name) current_state = self._get_stack_status(stack_name) @@ -712,13 +738,15 @@ def rollback_delete_stack(self, stack_name: str): if current_state in failed_states: LOG.info("Stack %s failed to create/update correctly, deleting stack", stack_name) + # get the latest stack event + marker_time = self.get_last_event_time(stack_name, 0) self._client.delete_stack(**kwargs) # only a stack that failed to create will have stack events, deleting # from a ROLLBACK_COMPLETE state will not return anything # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation.html#CloudFormation.Client.delete_stack if current_state == "CREATE_FAILED": - self.describe_stack_events(stack_name, time.time() * 1000, FailureMode.DELETE) + self.describe_stack_events(stack_name, marker_time, FailureMode.DELETE) waiter = self._client.get_waiter("stack_delete_complete") waiter.wait(StackName=stack_name, WaiterConfig={"Delay": 30, "MaxAttempts": 120}) diff --git a/tests/unit/commands/deploy/test_deploy_context.py b/tests/unit/commands/deploy/test_deploy_context.py index 495416b5dc..cf9e22f2f3 100644 --- a/tests/unit/commands/deploy/test_deploy_context.py +++ b/tests/unit/commands/deploy/test_deploy_context.py @@ -257,6 +257,7 @@ def test_on_failure_delete_rollback_stack(self, mock_client, mock_session): @patch.object(Deployer, "execute_changeset", MagicMock()) @patch.object(Deployer, "wait_for_execute", MagicMock()) @patch.object(Deployer, "create_and_wait_for_changeset", MagicMock(return_value=({"Id": "test"}, "CREATE"))) + @patch.object(Deployer, "get_last_event_time", MagicMock(return_value=1000)) def test_on_failure_do_nothing(self, mock_session, mock_client): with tempfile.NamedTemporaryFile(delete=False) as template_file: template_file.write(b"{}") @@ -268,5 +269,5 @@ def test_on_failure_do_nothing(self, mock_session, mock_client): self.deploy_command_context.run() self.deploy_command_context.deployer.wait_for_execute.assert_called_with( - ANY, "CREATE", False, FailureMode.DO_NOTHING + ANY, "CREATE", False, FailureMode.DO_NOTHING, 1000 ) diff --git a/tests/unit/lib/deploy/test_deployer.py b/tests/unit/lib/deploy/test_deployer.py index 9844366084..ae505ff546 100644 --- a/tests/unit/lib/deploy/test_deployer.py +++ b/tests/unit/lib/deploy/test_deployer.py @@ -420,10 +420,11 @@ def test_get_last_event_time(self): self.assertEqual(self.deployer.get_last_event_time("test"), utc_to_timestamp(timestamp)) def test_get_last_event_time_unknown_last_time(self): - current_timestamp = datetime.utcnow() + time_float = time.time() + current_timestamp = to_datetime(time_float * 1000) self.deployer._client.describe_stack_events = MagicMock(side_effect=KeyError) # Convert to milliseconds from seconds - last_stack_event_timestamp = to_datetime(self.deployer.get_last_event_time("test") * 1000) + last_stack_event_timestamp = to_datetime(self.deployer.get_last_event_time("test", time_float) * 1000) self.assertEqual(last_stack_event_timestamp.year, current_timestamp.year) self.assertEqual(last_stack_event_timestamp.month, current_timestamp.month) self.assertEqual(last_stack_event_timestamp.day, current_timestamp.day) @@ -1384,7 +1385,7 @@ def test_rollback_stack_update_stack_delete(self): self.assertEqual(self.deployer._client.rollback_stack.call_count, 1) self.assertEqual(self.deployer._client.delete_stack.call_count, 1) - self.assertEqual(self.deployer._client.describe_stack_events.call_count, 0) + self.assertEqual(self.deployer._client.describe_stack_events.call_count, 2) def test_rollback_invalid_stack_name(self): self.deployer._client.describe_stacks = MagicMock(