From 352710e9bb01e5401321ccfce8e73180ce546021 Mon Sep 17 00:00:00 2001 From: Aditya Hase Date: Sun, 19 Jan 2025 14:53:38 +0530 Subject: [PATCH 01/12] fix(vm-migration): Update TLS certificate after migration Virtual Machine Image might have older/expired certificates --- .../virtual_machine_migration.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/press/infrastructure/doctype/virtual_machine_migration/virtual_machine_migration.py b/press/infrastructure/doctype/virtual_machine_migration/virtual_machine_migration.py index b85b75b140..8bffe3f2cb 100644 --- a/press/infrastructure/doctype/virtual_machine_migration/virtual_machine_migration.py +++ b/press/infrastructure/doctype/virtual_machine_migration/virtual_machine_migration.py @@ -282,6 +282,7 @@ def migration_steps(self): (self.update_mounts, NoWait), (self.update_bind_mount_permissions, NoWait), (self.update_plan, NoWait), + (self.update_tls_certificate, NoWait), ] steps = [] @@ -496,6 +497,24 @@ def update_plan(self) -> StepStatus: server._change_plan(plan) return StepStatus.Success + def update_tls_certificate(self) -> StepStatus: + "Update TLS certificate" + server = self.machine.get_server() + server.update_tls_certificate() + + plays = frappe.get_all( + "Ansible Play", + {"server": server.name, "play": "Setup TLS Certificates", "creation": (">", self.creation)}, + ["status"], + order_by="creation desc", + limit=1, + ) + if not plays: + return StepStatus.Failure + if plays[0].status == "Success": + return StepStatus.Success + return StepStatus.Failure + @frappe.whitelist() def execute(self): self.status = "Running" From ae70a30138057ce37ba59ae624afd0d2d7522676 Mon Sep 17 00:00:00 2001 From: Aditya Hase Date: Sun, 19 Jan 2025 16:04:30 +0530 Subject: [PATCH 02/12] fix(vm-migration): Sync copied machine if it's stuck in Pending state Migration might get stuck if the copied machine is in Pending state Reference: https://frappecloud.com/app/virtual-machine-migration/92 --- .../virtual_machine_migration/virtual_machine_migration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/press/infrastructure/doctype/virtual_machine_migration/virtual_machine_migration.py b/press/infrastructure/doctype/virtual_machine_migration/virtual_machine_migration.py index 8bffe3f2cb..3dfe50ff3b 100644 --- a/press/infrastructure/doctype/virtual_machine_migration/virtual_machine_migration.py +++ b/press/infrastructure/doctype/virtual_machine_migration/virtual_machine_migration.py @@ -360,6 +360,7 @@ def terminate_previous_machine(self) -> StepStatus: if copied_machine.status == "Terminated": return StepStatus.Success if copied_machine.status == "Pending": + copied_machine.sync() return StepStatus.Pending copied_machine.disable_termination_protection() From 368c37951655c8146ab9b34520c3df00bb2eb4c5 Mon Sep 17 00:00:00 2001 From: Bread Genie Date: Sun, 19 Jan 2025 19:09:46 +0530 Subject: [PATCH 03/12] fix(group-sites): use proper function name In 32f1ed0 the function call was renamed but the function wasn't. --- dashboard/src2/pages/ReleaseGroupBenchSites.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dashboard/src2/pages/ReleaseGroupBenchSites.vue b/dashboard/src2/pages/ReleaseGroupBenchSites.vue index 35f55ebae5..1ac0b8f95b 100644 --- a/dashboard/src2/pages/ReleaseGroupBenchSites.vue +++ b/dashboard/src2/pages/ReleaseGroupBenchSites.vue @@ -428,7 +428,7 @@ export default { }, ]; }, - runDocMethod(name, methodName) { + runBenchMethod(name, methodName) { const method = createResource({ url: 'press.api.client.run_doc_method', }); From f30aff6984a8ff28906913c68276d81a8b62277e Mon Sep 17 00:00:00 2001 From: Aditya Hase Date: Mon, 20 Jan 2025 11:38:12 +0530 Subject: [PATCH 04/12] feat(agent): Add action to cancel or stop a job --- press/agent.py | 3 +++ press/press/doctype/agent_job/agent_job.js | 1 + press/press/doctype/agent_job/agent_job.py | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/press/agent.py b/press/agent.py index a44689ed8b..1f026a323c 100644 --- a/press/agent.py +++ b/press/agent.py @@ -964,6 +964,9 @@ def update_monitor_rules(self, rules, routes): def get_job_status(self, id): return self.get(f"jobs/{id}") + def cancel_job(self, id): + return self.post(f"jobs/{id}/cancel") + def get_site_sid(self, site, user=None): if user: data = {"user": user} diff --git a/press/press/doctype/agent_job/agent_job.js b/press/press/doctype/agent_job/agent_job.js index e4f8c7d79d..93a4803dca 100644 --- a/press/press/doctype/agent_job/agent_job.js +++ b/press/press/doctype/agent_job/agent_job.js @@ -52,6 +52,7 @@ frappe.ui.form.on('Agent Job', { __('Succeed and Process Job Updates'), 'succeed_and_process_job_updates', ], + [__('Cancel Job'), 'cancel_job', ['Pending', "Running"].includes(frm.doc.status)], ].forEach(([label, method, condition]) => { frm.add_custom_button( label, diff --git a/press/press/doctype/agent_job/agent_job.py b/press/press/doctype/agent_job/agent_job.py index 760a0dd7b8..ef5bb273d8 100644 --- a/press/press/doctype/agent_job/agent_job.py +++ b/press/press/doctype/agent_job/agent_job.py @@ -307,6 +307,11 @@ def fail_and_process_job_updates(self): def process_job_updates(self): process_job_updates(self.name) + @frappe.whitelist() + def cancel_job(self): + agent = Agent(self.server, server_type=self.server_type) + agent.cancel_job(self.job_id) + def on_trash(self): steps = frappe.get_all("Agent Job Step", filters={"agent_job": self.name}) for step in steps: From 9c5ffce44ecc6eedbcd1c690c38a84597e30146f Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 20 Jan 2025 12:02:58 +0530 Subject: [PATCH 05/12] fix(site-migration): Only allow if all domains are active --- .../doctype/site_migration/site_migration.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/press/press/doctype/site_migration/site_migration.py b/press/press/doctype/site_migration/site_migration.py index 659b0cc74b..790f2d4017 100644 --- a/press/press/doctype/site_migration/site_migration.py +++ b/press/press/doctype/site_migration/site_migration.py @@ -12,6 +12,7 @@ from press.agent import Agent from press.exceptions import ( CannotChangePlan, + InactiveDomains, InsufficientSpaceOnServer, MissingAppsInBench, OngoingAgentJob, @@ -123,11 +124,21 @@ def validate_apps(self): MissingAppsInBench, ) + def check_for_inactive_domains(self): + if domains := frappe.db.exists( + "Site Domain", {"site": self.site, "status": ("!=", "Active")}, pluck="name" + ): + frappe.throw( + f"Inactive custom domains exist: {','.join(domains)}. Please remove or fix the same.", + InactiveDomains, + ) + @frappe.whitelist() def start(self): self.status = "Pending" self.save() self.check_for_ongoing_agent_jobs() + self.check_for_inactive_domains() self.validate_apps() self.check_enough_space_on_destination_server() site: Site = frappe.get_doc("Site", self.site) @@ -683,11 +694,13 @@ def run_scheduled_migrations(): try: site_migration.start() except OngoingAgentJob: - pass + pass # ongoing jobs will finish in some time except MissingAppsInBench as e: site_migration.cleanup_and_fail(reason=str(e), force_activate=True) except InsufficientSpaceOnServer as e: site_migration.cleanup_and_fail(reason=str(e), force_activate=True) + except InactiveDomains as e: + site_migration.cleanup_and_fail(reason=str(e), force_activate=True) except Exception as e: log_error("Site Migration Start Error", exception=e) From 8b04274fdb8199c446471a7bffcc69e6f45dc991 Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 20 Jan 2025 12:06:31 +0530 Subject: [PATCH 06/12] fix(exceptions): Add missing exception --- press/exceptions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/press/exceptions.py b/press/exceptions.py index 02456c937a..831ab34ee3 100644 --- a/press/exceptions.py +++ b/press/exceptions.py @@ -51,3 +51,7 @@ class SiteUnderMaintenance(ValidationError): class SiteAlreadyArchived(ValidationError): pass + + +class InactiveDomains(ValidationError): + pass From 8b60220885c8f7b241e4ce94948725492da4beea Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 20 Jan 2025 12:07:56 +0530 Subject: [PATCH 07/12] fix(site-migration): Don't allow scheduling either if inactive domains --- press/press/doctype/site_migration/site_migration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/press/press/doctype/site_migration/site_migration.py b/press/press/doctype/site_migration/site_migration.py index 790f2d4017..6297a44a80 100644 --- a/press/press/doctype/site_migration/site_migration.py +++ b/press/press/doctype/site_migration/site_migration.py @@ -77,6 +77,7 @@ class SiteMigration(Document): def before_insert(self): self.validate_apps() self.validate_bench() + self.check_for_inactive_domains() self.check_enough_space_on_destination_server() if get_ongoing_migration(self.site, scheduled=True): frappe.throw(f"Ongoing/Scheduled Site Migration for the site {frappe.bold(self.site)} exists.") From 07aece45fd8bbd4ae94195f45b35cfbc69f9f1cc Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 20 Jan 2025 12:29:09 +0530 Subject: [PATCH 08/12] fix(site-migration): Use get_all instead of exists --- press/press/doctype/site_migration/site_migration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/press/press/doctype/site_migration/site_migration.py b/press/press/doctype/site_migration/site_migration.py index 6297a44a80..595755a074 100644 --- a/press/press/doctype/site_migration/site_migration.py +++ b/press/press/doctype/site_migration/site_migration.py @@ -126,7 +126,7 @@ def validate_apps(self): ) def check_for_inactive_domains(self): - if domains := frappe.db.exists( + if domains := frappe.db.get_all( "Site Domain", {"site": self.site, "status": ("!=", "Active")}, pluck="name" ): frappe.throw( From 5dfac8c5b8961508478cce0227d9fd0011ee0a9f Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 20 Jan 2025 14:48:30 +0530 Subject: [PATCH 09/12] fix(ansible): Add "until" key to all blocks with retries This is necessary as the alternative only works after ansible 2.16 - Also add a semgrep rule to prevent addition of such cases in the future --- press-semgrep-rules.yml | 83 +++++++++++++------ press/playbooks/filebeat_update.yml | 2 + .../earlyoom_memory_limits/tasks/main.yml | 2 + .../roles/extend_ec2_volume/tasks/main.yml | 2 + .../roles/mariadb_10_4_to_10_6/tasks/main.yml | 2 + 5 files changed, 64 insertions(+), 27 deletions(-) diff --git a/press-semgrep-rules.yml b/press-semgrep-rules.yml index e21f91adde..854155568f 100644 --- a/press-semgrep-rules.yml +++ b/press-semgrep-rules.yml @@ -22,6 +22,7 @@ rules: - python references: - https://docs.python-guide.org/writing/gotchas/#mutable-default-arguments + - id: except-with-db-code languages: - python @@ -32,33 +33,33 @@ rules: except ...: $ERR_HANDL_BLK - pattern-either: - - pattern: | - try: - ... - except ...: - ... - $DOC.save(...) - ... - raise - ... - - pattern: | - try: - ... - except ...: - ... - frappe. ... .set_value(...) - ... - raise - ... - - pattern: | - try: - ... - except ...: - ... - $DOC.db_set(...) - ... - raise - ... + - pattern: | + try: + ... + except ...: + ... + $DOC.save(...) + ... + raise + ... + - pattern: | + try: + ... + except ...: + ... + frappe. ... .set_value(...) + ... + raise + ... + - pattern: | + try: + ... + except ...: + ... + $DOC.db_set(...) + ... + raise + ... - pattern-not: | try: ... @@ -95,3 +96,31 @@ rules: message: except block has no db commit before raise. The db changes made won't persist assuming innodb tables. severity: ERROR + - id: retries-without-until + languages: + - yaml + patterns: + - pattern: | + ... + retries: $RETRIES + delay: $DELAY + ... + + - pattern-not: | + ... + retries: $RETRIES + delay: $DELAY + until: $UNTIL + ... + + paths: + include: + - 'press/playbooks/**/*.yml' + - 'press/playbooks/*.yml' + message: retry block doesn't have until condition. Only works with ansible 2.16 and above. + severity: ERROR + metadata: + category: correctness + references: + - https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_loops.html#retrying-a-task-until-a-condition-is-met + - https://docs.ansible.com/ansible/latest/reference_appendices/release_and_maintenance.html#ansible-community-changelogs diff --git a/press/playbooks/filebeat_update.yml b/press/playbooks/filebeat_update.yml index bf7635823b..bdf5f045c7 100644 --- a/press/playbooks/filebeat_update.yml +++ b/press/playbooks/filebeat_update.yml @@ -26,6 +26,8 @@ apt: name: filebeat state: latest + result: result + until: result.rc == 0 retries: 5 delay: 120 diff --git a/press/playbooks/roles/earlyoom_memory_limits/tasks/main.yml b/press/playbooks/roles/earlyoom_memory_limits/tasks/main.yml index feee4345e6..69d5b18350 100644 --- a/press/playbooks/roles/earlyoom_memory_limits/tasks/main.yml +++ b/press/playbooks/roles/earlyoom_memory_limits/tasks/main.yml @@ -5,6 +5,8 @@ - earlyoom state: present + register: result + until: result.rc == 0 retries: 5 delay: 120 diff --git a/press/playbooks/roles/extend_ec2_volume/tasks/main.yml b/press/playbooks/roles/extend_ec2_volume/tasks/main.yml index fabc0f84e7..de0652e82a 100644 --- a/press/playbooks/roles/extend_ec2_volume/tasks/main.yml +++ b/press/playbooks/roles/extend_ec2_volume/tasks/main.yml @@ -15,6 +15,8 @@ - name: Extend Partition command: 'growpart {{ device }} 1' + register: result + until: result.rc == 0 retries: 10 delay: 10 when: partitioned_disk diff --git a/press/playbooks/roles/mariadb_10_4_to_10_6/tasks/main.yml b/press/playbooks/roles/mariadb_10_4_to_10_6/tasks/main.yml index 2ba262c95a..eef97cafbc 100644 --- a/press/playbooks/roles/mariadb_10_4_to_10_6/tasks/main.yml +++ b/press/playbooks/roles/mariadb_10_4_to_10_6/tasks/main.yml @@ -41,6 +41,8 @@ - mariadb-client - libmariadbclient18 state: latest + register: result + until: result.rc == 0 retries: 5 delay: 120 From 8405200be075c32de9a3ab0c6b009bf6cfe3b9b7 Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 20 Jan 2025 14:51:56 +0530 Subject: [PATCH 10/12] ci(semgrep): Remove redundant path --- press-semgrep-rules.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/press-semgrep-rules.yml b/press-semgrep-rules.yml index 854155568f..f875091868 100644 --- a/press-semgrep-rules.yml +++ b/press-semgrep-rules.yml @@ -116,7 +116,6 @@ rules: paths: include: - 'press/playbooks/**/*.yml' - - 'press/playbooks/*.yml' message: retry block doesn't have until condition. Only works with ansible 2.16 and above. severity: ERROR metadata: From 460e6525e3807d457cdb4a65dabe2288d9e588d6 Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 20 Jan 2025 15:17:17 +0530 Subject: [PATCH 11/12] fix(release-group-variable): Make value a text field To add longer keys --- .../release_group_variable/release_group_variable.json | 4 ++-- .../release_group_variable/release_group_variable.py | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/press/press/doctype/release_group_variable/release_group_variable.json b/press/press/doctype/release_group_variable/release_group_variable.json index cadf0506de..beb7ba9afb 100644 --- a/press/press/doctype/release_group_variable/release_group_variable.json +++ b/press/press/doctype/release_group_variable/release_group_variable.json @@ -20,7 +20,7 @@ }, { "fieldname": "value", - "fieldtype": "Data", + "fieldtype": "Text", "in_list_view": 1, "label": "Value", "reqd": 1 @@ -36,7 +36,7 @@ "index_web_pages_for_search": 1, "istable": 1, "links": [], - "modified": "2024-02-11 14:56:12.851224", + "modified": "2025-01-20 15:12:45.664299", "modified_by": "Administrator", "module": "Press", "name": "Release Group Variable", diff --git a/press/press/doctype/release_group_variable/release_group_variable.py b/press/press/doctype/release_group_variable/release_group_variable.py index 1d5c7633b8..c56639a984 100644 --- a/press/press/doctype/release_group_variable/release_group_variable.py +++ b/press/press/doctype/release_group_variable/release_group_variable.py @@ -9,7 +9,7 @@ class ReleaseGroupVariable(Document): # begin: auto-generated types # This code is auto-generated. Do not modify anything in this block. - from typing import TYPE_CHECKING + from typing import TYPE_CHECKING, ClassVar if TYPE_CHECKING: from frappe.types import DF @@ -19,10 +19,10 @@ class ReleaseGroupVariable(Document): parent: DF.Data parentfield: DF.Data parenttype: DF.Data - value: DF.Data + value: DF.Text # end: auto-generated types - dashboard_fields = ["key", "value"] + dashboard_fields: ClassVar = ["key", "value"] @staticmethod def get_list_query(query, filters=None, **list_args): @@ -30,5 +30,4 @@ def get_list_query(query, filters=None, **list_args): query = query.where(environmentVariable.internal == 0).orderby( environmentVariable.key, order=frappe.qb.asc ) - configs = query.run(as_dict=True) - return configs + return query.run(as_dict=True) From 62f834ac13b61e8e48c779d0bf5cc8f32201cd84 Mon Sep 17 00:00:00 2001 From: Balamurali M Date: Mon, 20 Jan 2025 16:32:24 +0530 Subject: [PATCH 12/12] fix(site): Handle common exceptions during login_as_admin/team - https://support.frappe.io/helpdesk/tickets/29690 - https://trace.frappe.cloud/share/issue/a93f0c40fa7047f08b233c7421e895e8/ --- press/press/doctype/site/site.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/press/press/doctype/site/site.py b/press/press/doctype/site/site.py index 7242c9a0de..f8abf708d5 100644 --- a/press/press/doctype/site/site.py +++ b/press/press/doctype/site/site.py @@ -1444,6 +1444,15 @@ def get_login_sid(self, user="Administrator"): try: agent = Agent(self.server) sid = agent.get_site_sid(self, user) + except requests.HTTPError as e: + if "validate_ip_address" in str(e): + frappe.throw( + f"Login with {user}'s credentials is IP restricted. Please remove the same and try again.", + frappe.ValidationError, + ) + elif f"User {user} does not exist" in str(e): + frappe.throw(f"User {user} does not exist in the site", frappe.ValidationError) + raise e except AgentRequestSkippedException: frappe.throw( "Server is unresponsive. Please try again in some time.",