diff --git a/CHANGES.md b/CHANGES.md index 3e9a6d44ab0..a70cfee8406 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -35,6 +35,9 @@ Maintenance release. ### Fixes +[#5104](https://github.com/cylc/cylc-flow/pull/5104) - Fix retriggering of +failed tasks after a reload. + [#5131](https://github.com/cylc/cylc-flow/pull/5131) - Infer workflow run number for `workflow_state` xtrigger. diff --git a/cylc/flow/task_job_mgr.py b/cylc/flow/task_job_mgr.py index f4b2d1b6e73..7d18bd347a1 100644 --- a/cylc/flow/task_job_mgr.py +++ b/cylc/flow/task_job_mgr.py @@ -767,7 +767,10 @@ def _manip_task_jobs_callback( tasks = {} for itask in itasks: while itask.reload_successor is not None: + # Note submit number could be incremented since reload. + subnum = itask.submit_num itask = itask.reload_successor + itask.submit_num = subnum if itask.point is not None and itask.submit_num: submit_num = "%02d" % (itask.submit_num) tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask diff --git a/cylc/flow/task_pool.py b/cylc/flow/task_pool.py index 667eaa8af31..5d86663fd12 100644 --- a/cylc/flow/task_pool.py +++ b/cylc/flow/task_pool.py @@ -153,14 +153,10 @@ def stop_task_done(self): def _swap_out(self, itask): """Swap old task for new, during reload.""" - if itask.point in self.hidden_pool: - if itask.identity in self.hidden_pool[itask.point]: - self.hidden_pool[itask.point][itask.identity] = itask - self.hidden_pool_changed = True - elif ( - itask.point in self.main_pool - and itask.identity in self.main_pool[itask.point] - ): + if itask.identity in self.hidden_pool.get(itask.point, set()): + self.hidden_pool[itask.point][itask.identity] = itask + self.hidden_pool_changed = True + elif itask.identity in self.main_pool.get(itask.point, set()): self.main_pool[itask.point][itask.identity] = itask self.main_pool_changed = True diff --git a/tests/functional/reload/27-stall-retrigger.t b/tests/functional/reload/27-stall-retrigger.t new file mode 100644 index 00000000000..29771920f21 --- /dev/null +++ b/tests/functional/reload/27-stall-retrigger.t @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# Test retriggering a failed task after fixing the bug and reloading. +# It should run correctly with the updated settings. + +# https://github.com/cylc/cylc-flow/issues/5103 + +. "$(dirname "$0")/test_header" +set_test_number 2 +reftest diff --git a/tests/functional/reload/27-stall-retrigger/bin/stall-handler.sh b/tests/functional/reload/27-stall-retrigger/bin/stall-handler.sh new file mode 100755 index 00000000000..f9c14d60db2 --- /dev/null +++ b/tests/functional/reload/27-stall-retrigger/bin/stall-handler.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Change "script = false" -> "true" in 1/foo, then reload and retrigger it. + +if grep "\[command\] reload_workflow" "${CYLC_WORKFLOW_LOG_DIR}/log" >/dev/null; then + # Abort if not the first call (avoid an endless loop if the reload does not + # have the intended effect). + >&2 echo "ERROR (stall-handler.sh): should only be called once" + cylc stop --now --now "${CYLC_WORKFLOW_ID}" + exit 1 +fi +sed -i "s/false/true/" "${CYLC_WORKFLOW_RUN_DIR}/suite.rc" +cylc reload "${CYLC_WORKFLOW_ID}" +cylc trigger "${CYLC_WORKFLOW_ID}//1/foo" diff --git a/tests/functional/reload/27-stall-retrigger/reference.log b/tests/functional/reload/27-stall-retrigger/reference.log new file mode 100644 index 00000000000..63d91f5f612 --- /dev/null +++ b/tests/functional/reload/27-stall-retrigger/reference.log @@ -0,0 +1,3 @@ +1/foo -triggered off [] in flow 1 +1/foo -triggered off [] in flow 1 +1/bar -triggered off ['1/foo'] in flow 1 diff --git a/tests/functional/reload/27-stall-retrigger/suite.rc b/tests/functional/reload/27-stall-retrigger/suite.rc new file mode 100644 index 00000000000..ac90faffa80 --- /dev/null +++ b/tests/functional/reload/27-stall-retrigger/suite.rc @@ -0,0 +1,13 @@ +# Use a stall handler to fix and reload the workflow config, then retrigger the +# failed task, which should run successfully with the new settings. +[scheduler] + [[events]] + stall handlers = stall-handler.sh + expected task failures = 1/foo +[scheduling] + [[graph]] + R1 = "foo => bar" +[runtime] + [[foo]] + script = false + [[bar]]