From e26c7f467a5056613566ebca5789ea7c854c631e Mon Sep 17 00:00:00 2001 From: tanmoysrt <57363826+tanmoysrt@users.noreply.github.com> Date: Mon, 20 Jan 2025 17:03:05 +0530 Subject: [PATCH] refactor(physical-restore): Always run cleanup on failure --- .../physical_backup_restoration.js | 1 + .../physical_backup_restoration.py | 71 +++++++++++++------ .../physical_backup_restoration_step.json | 9 ++- .../physical_backup_restoration_step.py | 1 + 4 files changed, 59 insertions(+), 23 deletions(-) diff --git a/press/press/doctype/physical_backup_restoration/physical_backup_restoration.js b/press/press/doctype/physical_backup_restoration/physical_backup_restoration.js index 4a85f52b05..018791f806 100644 --- a/press/press/doctype/physical_backup_restoration/physical_backup_restoration.js +++ b/press/press/doctype/physical_backup_restoration/physical_backup_restoration.js @@ -6,6 +6,7 @@ frappe.ui.form.on('Physical Backup Restoration', { [ [__('Start'), 'execute', frm.doc.status === 'Pending'], [__('Force Continue'), 'force_continue', frm.doc.status === 'Failure'], + [__('Cleanup'), 'cleanup', frm.doc.status === 'Failure'], [__('Force Fail'), 'force_fail', frm.doc.status === 'Running'], ].forEach(([label, method, condition]) => { if (condition) { diff --git a/press/press/doctype/physical_backup_restoration/physical_backup_restoration.py b/press/press/doctype/physical_backup_restoration/physical_backup_restoration.py index dc9c11148e..2876b5a7bb 100644 --- a/press/press/doctype/physical_backup_restoration/physical_backup_restoration.py +++ b/press/press/doctype/physical_backup_restoration/physical_backup_restoration.py @@ -22,7 +22,7 @@ from press.press.doctype.virtual_disk_snapshot.virtual_disk_snapshot import VirtualDiskSnapshot from press.press.doctype.virtual_machine.virtual_machine import VirtualMachine -StepStatus = Enum("StepStatus", ["Pending", "Running", "Success", "Failure"]) +StepStatus = Enum("StepStatus", ["Pending", "Running", "Skipped", "Success", "Failure"]) class PhysicalBackupRestoration(Document): @@ -68,32 +68,35 @@ def migration_steps(self): NoWait = False SyncStep = False AsyncStep = True + GeneralStep = False + CleanupStep = True methods = [ - (self.wait_for_pending_snapshot_to_be_completed, Wait, SyncStep), - (self.create_volume_from_snapshot, NoWait, SyncStep), - (self.wait_for_volume_to_be_available, Wait, SyncStep), - (self.attach_volume_to_instance, NoWait, SyncStep), - (self.create_mount_point, NoWait, SyncStep), - (self.mount_volume_to_instance, NoWait, SyncStep), - (self.change_permission_of_backup_directory, NoWait, SyncStep), - (self.change_permission_of_database_directory, NoWait, SyncStep), - (self.restore_database, Wait, AsyncStep), - (self.rollback_permission_change_of_database_directory, NoWait, SyncStep), - (self.unmount_volume_from_instance, NoWait, SyncStep), - (self.delete_mount_point, NoWait, SyncStep), - (self.detach_volume_from_instance, NoWait, SyncStep), - (self.wait_for_volume_to_be_detached, Wait, SyncStep), - (self.delete_volume, NoWait, SyncStep), + (self.wait_for_pending_snapshot_to_be_completed, Wait, SyncStep, GeneralStep), + (self.create_volume_from_snapshot, NoWait, SyncStep, GeneralStep), + (self.wait_for_volume_to_be_available, Wait, SyncStep, GeneralStep), + (self.attach_volume_to_instance, NoWait, SyncStep, GeneralStep), + (self.create_mount_point, NoWait, SyncStep, GeneralStep), + (self.mount_volume_to_instance, NoWait, SyncStep, GeneralStep), + (self.change_permission_of_backup_directory, NoWait, SyncStep, GeneralStep), + (self.change_permission_of_database_directory, NoWait, SyncStep, GeneralStep), + (self.restore_database, Wait, AsyncStep, GeneralStep), + (self.rollback_permission_of_database_directory, NoWait, SyncStep, CleanupStep), + (self.unmount_volume_from_instance, NoWait, SyncStep, CleanupStep), + (self.delete_mount_point, NoWait, SyncStep, CleanupStep), + (self.detach_volume_from_instance, NoWait, SyncStep, CleanupStep), + (self.wait_for_volume_to_be_detached, Wait, SyncStep, CleanupStep), + (self.delete_volume, NoWait, SyncStep, CleanupStep), ] steps = [] - for method, wait_for_completion, is_async in methods: + for method, wait_for_completion, is_async, is_cleanup_step in methods: steps.append( { "step": method.__doc__, "method": method.__name__, "wait_for_completion": wait_for_completion, "is_async": is_async, + "is_cleanup_step": is_cleanup_step, } ) return steps @@ -294,7 +297,7 @@ def restore_database(self) -> StepStatus: return StepStatus.Success return StepStatus.Failure - def rollback_permission_change_of_database_directory(self) -> StepStatus: + def rollback_permission_of_database_directory(self) -> StepStatus: """Rollback permission of database directory""" # Docs > https://mariadb.com/kb/en/specifying-permissions-for-schema-data-directories-and-tables/ @@ -309,7 +312,7 @@ def rollback_permission_change_of_database_directory(self) -> StepStatus: def unmount_volume_from_instance(self) -> StepStatus: """Unmount volume from instance""" - if self.get_step_status(self.mount_volume_to_instance) != StepStatus.Success: + if self.get_step_status(self.mount_volume_to_instance) != StepStatus.Success.name: return StepStatus.Success response = self.ansible_run(f"umount {self.mount_point}") if response["status"] != "Success": @@ -353,7 +356,10 @@ def wait_for_volume_to_be_detached(self) -> StepStatus: def delete_volume(self) -> StepStatus: """Delete volume""" - if not self.volume or self.get_step_status(self.create_volume_from_snapshot) != StepStatus.Success: + if ( + not self.volume + or self.get_step_status(self.create_volume_from_snapshot) != StepStatus.Success.name + ): return StepStatus.Success state = self.virtual_machine.get_state_of_volume(self.volume) if state in ["deleting", "deleted"]: @@ -385,9 +391,14 @@ def fail(self) -> None: self.end = frappe.utils.now_datetime() self.duration = (self.end - self.start).total_seconds() self.save() + self.cleanup() - def succeed(self) -> None: + def finish(self) -> None: self.status = "Success" + # If any step is failed, then mark the job as failed + for step in self.steps: + if step.status == "Failure": + self.status = "Failure" self.end = frappe.utils.now_datetime() self.duration = (self.end - self.start).total_seconds() self.save() @@ -400,7 +411,7 @@ def next(self, ignore_version=False) -> None: if not next_step: # We've executed everything - self.succeed() + self.finish() return frappe.enqueue_doc( @@ -412,6 +423,22 @@ def next(self, ignore_version=False) -> None: at_front=True, ) + @frappe.whitelist() + def cleanup(self): + is_cleanup_required = False + for step in self.steps: + # Mark the pending non-cleanup steps as skipped + if not step.is_cleanup_step and step.status == "Pending": + step.status = "Skipped" + + # Mark the cleanup steps with non-failure status as pending + if step.is_cleanup_step and step.status != "Failure": + step.status = "Pending" + is_cleanup_required = True + + if is_cleanup_required: + self.next() + @frappe.whitelist() def force_continue(self) -> None: first_failed_step: PhysicalBackupRestorationStep = None diff --git a/press/press/doctype/physical_backup_restoration_step/physical_backup_restoration_step.json b/press/press/doctype/physical_backup_restoration_step/physical_backup_restoration_step.json index f5b40d6b9f..8e59a285e2 100644 --- a/press/press/doctype/physical_backup_restoration_step/physical_backup_restoration_step.json +++ b/press/press/doctype/physical_backup_restoration_step/physical_backup_restoration_step.json @@ -17,6 +17,7 @@ "wait_for_completion", "is_async", "attempts", + "is_cleanup_step", "section_break_vyao", "traceback" ], @@ -90,12 +91,18 @@ "fieldname": "is_async", "fieldtype": "Check", "label": "Is Async" + }, + { + "default": "0", + "fieldname": "is_cleanup_step", + "fieldtype": "Check", + "label": "Is Cleanup Step" } ], "index_web_pages_for_search": 1, "istable": 1, "links": [], - "modified": "2025-01-20 15:38:21.354247", + "modified": "2025-01-20 16:48:34.338658", "modified_by": "Administrator", "module": "Press", "name": "Physical Backup Restoration Step", diff --git a/press/press/doctype/physical_backup_restoration_step/physical_backup_restoration_step.py b/press/press/doctype/physical_backup_restoration_step/physical_backup_restoration_step.py index aed78bb160..0940ecf923 100644 --- a/press/press/doctype/physical_backup_restoration_step/physical_backup_restoration_step.py +++ b/press/press/doctype/physical_backup_restoration_step/physical_backup_restoration_step.py @@ -20,6 +20,7 @@ class PhysicalBackupRestorationStep(Document): duration: DF.Duration | None end: DF.Datetime | None is_async: DF.Check + is_cleanup_step: DF.Check method: DF.Data parent: DF.Data parentfield: DF.Data