From 34b923cf8ac4a8d3768d4d8517bbb97aa75712c0 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Mon, 6 Nov 2023 17:42:39 +0100 Subject: [PATCH 01/69] added parameter change_wf in the load methods * this parameter allows to load workflow elements (e.g. Step) but loading them on a different workflow --- streamflow/core/utils.py | 27 +++++++++ streamflow/core/workflow.py | 74 +++++++++++++---------- streamflow/cwl/combinator.py | 5 +- streamflow/cwl/processor.py | 60 ++++++++++++++----- streamflow/cwl/step.py | 10 +++- streamflow/cwl/transformer.py | 17 ++++-- streamflow/workflow/combinator.py | 5 +- streamflow/workflow/step.py | 98 +++++++++++++++++++++++++------ 8 files changed, 223 insertions(+), 73 deletions(-) diff --git a/streamflow/core/utils.py b/streamflow/core/utils.py index de72a199f..0090fcdf7 100644 --- a/streamflow/core/utils.py +++ b/streamflow/core/utils.py @@ -16,7 +16,9 @@ TYPE_CHECKING, ) +from streamflow.core.context import StreamFlowContext from streamflow.core.exception import WorkflowExecutionException +from streamflow.core.persistence import DatabaseLoadingContext if TYPE_CHECKING: from streamflow.core.deployment import Connector, Location @@ -276,3 +278,28 @@ def random_name() -> str: def wrap_command(command: str): return ["/bin/sh", "-c", f"{command}"] + + +async def get_dependencies( + dependency_rows: MutableSequence[MutableMapping[str, Any]], + load_ports: bool, + context: StreamFlowContext, + loading_context: DatabaseLoadingContext, +): + if load_ports: + ports = await asyncio.gather( + *( + asyncio.create_task(loading_context.load_port(context, d["port"])) + for d in dependency_rows + ) + ) + return {d["name"]: p.name for d, p in zip(dependency_rows, ports)} + else: + # it is not helpful to have an instance in loading_context when it is building a new workflow + port_rows = await asyncio.gather( + *( + asyncio.create_task(context.database.get_port(d["port"])) + for d in dependency_rows + ) + ) + return {d["name"]: p["name"] for d, p in zip(dependency_rows, port_rows)} diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 3e76398ce..01532fa5d 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -15,6 +15,7 @@ DependencyType, PersistableEntity, ) +from streamflow.core.utils import get_dependencies if TYPE_CHECKING: from streamflow.core.deployment import Connector, Location, Target @@ -102,10 +103,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -126,11 +130,12 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow = None, ) -> CommandOutputProcessor: type = cast( Type[CommandOutputProcessor], utils.get_class_from_name(row["type"]) ) - return await type._load(context, row["params"], loading_context) + return await type._load(context, row["params"], loading_context, change_wf) @abstractmethod async def process( @@ -259,10 +264,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> Port: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), ) async def _save_additional_params( @@ -306,12 +314,14 @@ async def load( context: StreamFlowContext, persistent_id: int, loading_context: DatabaseLoadingContext, + change_wf: Workflow = None, ) -> Port: row = await context.database.get_port(persistent_id) type = cast(Type[Port], utils.get_class_from_name(row["type"])) - port = await type._load(context, row, loading_context) - port.persistent_id = persistent_id - loading_context.add_port(persistent_id, port) + port = await type._load(context, row, loading_context, change_wf) + if not change_wf: + port.persistent_id = persistent_id + loading_context.add_port(persistent_id, port) return port def put(self, token: Token): @@ -364,10 +374,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ): return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), ) async def _save_additional_params( @@ -428,36 +441,29 @@ async def load( context: StreamFlowContext, persistent_id: int, loading_context: DatabaseLoadingContext, + change_wf: Workflow = None, ) -> Step: row = await context.database.get_step(persistent_id) type = cast(Type[Step], utils.get_class_from_name(row["type"])) - step = await type._load(context, row, loading_context) - step.persistent_id = persistent_id - step.status = Status(row["status"]) - step.terminated = step.status in [ - Status.COMPLETED, - Status.FAILED, - Status.SKIPPED, - ] + step = await type._load(context, row, loading_context, change_wf) + if not change_wf: + step.persistent_id = persistent_id + step.status = Status(row["status"]) + step.terminated = step.status in [ + Status.COMPLETED, + Status.FAILED, + Status.SKIPPED, + ] input_deps = await context.database.get_input_ports(persistent_id) - input_ports = await asyncio.gather( - *( - asyncio.create_task(loading_context.load_port(context, d["port"])) - for d in input_deps - ) + step.input_ports = await get_dependencies( + input_deps, change_wf is None, context, loading_context ) - step.input_ports = {d["name"]: p.name for d, p in zip(input_deps, input_ports)} output_deps = await context.database.get_output_ports(persistent_id) - output_ports = await asyncio.gather( - *( - asyncio.create_task(loading_context.load_port(context, d["port"])) - for d in output_deps - ) + step.output_ports = await get_dependencies( + output_deps, change_wf is None, context, loading_context ) - step.output_ports = { - d["name"]: p.name for d, p in zip(output_deps, output_ports) - } - loading_context.add_step(persistent_id, step) + if not change_wf: + loading_context.add_step(persistent_id, step) return step @abstractmethod @@ -587,10 +593,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> TokenProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), ) async def _save_additional_params(self, context: StreamFlowContext): @@ -602,9 +611,10 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow = None, ): type = cast(Type[TokenProcessor], utils.get_class_from_name(row["type"])) - return await type._load(context, row["params"], loading_context) + return await type._load(context, row["params"], loading_context, change_wf) @abstractmethod async def process(self, inputs: MutableMapping[str, Token], token: Token) -> Token: diff --git a/streamflow/cwl/combinator.py b/streamflow/cwl/combinator.py index 1a7cdaf7c..2553ca9d9 100644 --- a/streamflow/cwl/combinator.py +++ b/streamflow/cwl/combinator.py @@ -41,10 +41,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow = None, ) -> ListMergeCombinator: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), input_names=row["input_names"], output_name=row["output_name"], flatten=row["flatten"], diff --git a/streamflow/cwl/processor.py b/streamflow/cwl/processor.py index 7c194bd1a..b90c3b9d1 100644 --- a/streamflow/cwl/processor.py +++ b/streamflow/cwl/processor.py @@ -149,11 +149,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CWLTokenProcessor: format_graph = Graph() return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), token_type=row["token_type"], check_type=row["check_type"], enum_symbols=row["enum_symbols"], @@ -163,7 +166,7 @@ async def _load( format_graph.parse(data=row["format_graph"]) if row["format_graph"] is not None else format_graph - ), + ), # todo: fix multiple instance full_js=row["full_js"], load_contents=row["load_contents"], load_listing=LoadListing(row["load_listing"]) @@ -388,10 +391,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -726,12 +732,15 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CWLMapTokenProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), processor=await TokenProcessor.load( - context, row["processor"], loading_context + context, row["processor"], loading_context, change_wf ), optional=row["optional"], ) @@ -788,12 +797,15 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), processor=await CommandOutputProcessor.load( - context, row["processor"], loading_context + context, row["processor"], loading_context, change_wf ), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] @@ -861,10 +873,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CWLObjectTokenProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), processors={ k: v for k, v in zip( @@ -872,7 +887,9 @@ async def _load( await asyncio.gather( *( asyncio.create_task( - TokenProcessor.load(context, v, loading_context) + TokenProcessor.load( + context, v, loading_context, change_wf + ) ) for v in row["processors"].values() ) @@ -961,11 +978,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CommandOutputProcessor: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -976,7 +996,9 @@ async def _load( await asyncio.gather( *( asyncio.create_task( - CommandOutputProcessor.load(context, v, loading_context) + CommandOutputProcessor.load( + context, v, loading_context, change_wf + ) ) for v in row["processors"].values() ) @@ -1143,16 +1165,19 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CWLUnionTokenProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), processors=cast( MutableSequence[TokenProcessor], await asyncio.gather( *( asyncio.create_task( - TokenProcessor.load(context, p, loading_context) + TokenProcessor.load(context, p, loading_context, change_wf) ) for p in row["processors"] ) @@ -1260,16 +1285,21 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), processors=cast( MutableSequence[CommandOutputProcessor], await asyncio.gather( *( asyncio.create_task( - CommandOutputProcessor.load(context, p, loading_context) + CommandOutputProcessor.load( + context, p, loading_context, change_wf + ) ) for p in row["processors"] ) diff --git a/streamflow/cwl/step.py b/streamflow/cwl/step.py index cfa21f9f8..e1559b627 100644 --- a/streamflow/cwl/step.py +++ b/streamflow/cwl/step.py @@ -137,11 +137,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CWLConditionalStep: params = json.loads(row["params"]) step = cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), expression=params["expression"], expression_lib=params["expression_lib"], full_js=params["full_js"], @@ -218,11 +221,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), scatter_method=params["scatter_method"], ) diff --git a/streamflow/cwl/transformer.py b/streamflow/cwl/transformer.py index 22119495a..b3287a88f 100644 --- a/streamflow/cwl/transformer.py +++ b/streamflow/cwl/transformer.py @@ -46,11 +46,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), default_port=await loading_context.load_port( context, params["default_port"] ), @@ -116,14 +119,17 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), port_name=params["port_name"], processor=await TokenProcessor.load( - context, params["processor"], loading_context + context, params["processor"], loading_context, change_wf ), ) @@ -245,11 +251,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), port_name=params["port_name"], processor=await TokenProcessor.load( context, params["processor"], loading_context diff --git a/streamflow/workflow/combinator.py b/streamflow/workflow/combinator.py index 4165d94de..a67b7e8b5 100644 --- a/streamflow/workflow/combinator.py +++ b/streamflow/workflow/combinator.py @@ -26,10 +26,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CartesianProductCombinator: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), depth=row["depth"], ) diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index 8e79e2742..1b15368fb 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -50,6 +50,22 @@ ) +async def _get_port( + context: StreamFlowContext, + port_id: int, + loading_context: DatabaseLoadingContext, + change_wf: Workflow, +): + if change_wf: + port_row = await context.database.get_port(port_id) + if port_row["name"] in change_wf.ports.keys(): + return change_wf.ports[port_row["name"]] + + # If the port is not available in the new workflow, a new one must be created + return await Port.load(context, port_id, loading_context, change_wf) + return await loading_context.load_port(context, port_id) + + def _get_directory(path_processor: ModuleType, directory: str | None, target: Target): return directory or path_processor.join(target.workdir, utils.random_name()) @@ -155,10 +171,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> Combinator: return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), ) async def _save_additional_params(self, context: StreamFlowContext): @@ -214,15 +233,18 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow = None, ) -> Combinator: type = cast(Combinator, utils.get_class_from_name(row["type"])) - combinator = await type._load(context, row["params"], loading_context) + combinator = await type._load( + context, row["params"], loading_context, change_wf + ) combinator.items = row["params"]["items"] combinator.combinators_map = row["params"]["combinators_map"] combinator.combinators = {} for k, c in row["params"]["combinators"].items(): combinator.combinators[k] = await Combinator.load( - context, c, loading_context + context, c, loading_context, change_wf ) return combinator @@ -282,13 +304,16 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> CombinatorStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), combinator=await Combinator.load( - context, params["combinator"], loading_context + context, params["combinator"], loading_context, change_wf ), ) @@ -450,17 +475,22 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> DeployStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), deployment_config=await loading_context.load_deployment( context, params["deployment_config"] ), connector_port=cast( ConnectorPort, - await loading_context.load_port(context, params["connector_port"]), + await _get_port( + context, params["connector_port"], loading_context, change_wf + ), ), ) @@ -561,13 +591,19 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> ExecuteStep: params = json.loads(row["params"]) step = cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), job_port=cast( - JobPort, await loading_context.load_port(context, params["job_port"]) + JobPort, + await _get_port( + context, params["job_port"], loading_context, change_wf + ), ), ) step.output_connectors = params["output_connectors"] @@ -578,7 +614,9 @@ async def _load( await asyncio.gather( *( asyncio.create_task( - CommandOutputProcessor.load(context, p, loading_context) + CommandOutputProcessor.load( + context, p, loading_context, change_wf + ) ) for p in params["output_processors"].values() ) @@ -837,11 +875,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> GatherStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), depth=params["depth"], ) @@ -922,13 +963,19 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> InputInjectorStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), job_port=cast( - JobPort, await loading_context.load_port(context, params["job_port"]) + JobPort, + await _get_port( + context, params["job_port"], loading_context, change_wf + ), ), ) @@ -1219,6 +1266,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> ScheduleStep: params = json.loads(row["params"]) if hardware_requirement := params.get("hardware_requirement"): @@ -1227,16 +1275,24 @@ async def _load( ) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), binding_config=await BindingConfig.load( context, params["binding_config"], loading_context ), connector_ports={ - k: cast(ConnectorPort, await loading_context.load_port(context, v)) + k: cast( + ConnectorPort, + await _get_port(context, v, loading_context, change_wf), + ) for k, v in params["connector_ports"].items() }, job_port=cast( - JobPort, await loading_context.load_port(context, params["job_port"]) + JobPort, + await _get_port( + context, params["job_port"], loading_context, change_wf + ), ), job_prefix=params["job_prefix"], hardware_requirement=hardware_requirement, @@ -1506,13 +1562,19 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> TransferStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), job_port=cast( - JobPort, await loading_context.load_port(context, params["job_port"]) + JobPort, + await _get_port( + context, params["job_port"], loading_context, change_wf + ), ), ) From 9d0bdd28fb616dc4655979497ca1ffd0b4400839 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 00:07:13 +0100 Subject: [PATCH 02/69] fix Config persistence --- streamflow/persistence/schemas/sqlite.sql | 15 +++++++-------- streamflow/workflow/step.py | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/streamflow/persistence/schemas/sqlite.sql b/streamflow/persistence/schemas/sqlite.sql index e3cd991e4..6a432f278 100644 --- a/streamflow/persistence/schemas/sqlite.sql +++ b/streamflow/persistence/schemas/sqlite.sql @@ -77,15 +77,14 @@ CREATE TABLE IF NOT EXISTS provenance ); -CREATE TABLE IF NOT EXISTS deployment +CREATE TABLE IF NOT EXISTS config ( - id INTEGER PRIMARY KEY, - name TEXT, - type TEXT, - config TEXT, - external INTEGER, - lazy INTEGER, - workdir TEXT + id INTEGER PRIMARY KEY, + name TEXT, + attr_type TEXT, + config TEXT, + type TEXT, + params TEXT ); diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index 1b15368fb..b56395693 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -483,7 +483,7 @@ async def _load( workflow=change_wf if change_wf else await loading_context.load_workflow(context, row["workflow"]), - deployment_config=await loading_context.load_deployment( + deployment_config=await loading_context.load_config( context, params["deployment_config"] ), connector_port=cast( From 29052b795aaa1d8bebe438016e89db89bc47152e Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 00:46:45 +0100 Subject: [PATCH 03/69] refactory from deployment to config --- streamflow/persistence/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streamflow/persistence/base.py b/streamflow/persistence/base.py index 4fca0eb9b..b97471584 100644 --- a/streamflow/persistence/base.py +++ b/streamflow/persistence/base.py @@ -10,7 +10,7 @@ class CachedDatabase(Database, ABC): def __init__(self, context: StreamFlowContext): super().__init__(context) - self.deployment_cache: Cache = LRUCache(maxsize=sys.maxsize) + self.config_cache: Cache = LRUCache(maxsize=sys.maxsize) self.port_cache: Cache = LRUCache(maxsize=sys.maxsize) self.step_cache: Cache = LRUCache(maxsize=sys.maxsize) self.target_cache: Cache = LRUCache(maxsize=sys.maxsize) From a25a63e418913757e1813094d9bd86a6115cda76 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 14:40:29 +0100 Subject: [PATCH 04/69] fix: config is read-only so workflows can share the obj with same id --- streamflow/workflow/step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index b56395693..d36a2b9e4 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -1279,7 +1279,7 @@ async def _load( if change_wf else await loading_context.load_workflow(context, row["workflow"]), binding_config=await BindingConfig.load( - context, params["binding_config"], loading_context + context, params["binding_config"], loading_context, change_wf ), connector_ports={ k: cast( From f24eacb564646afd67f57ca727de209d4dc18f20 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 17:41:25 +0100 Subject: [PATCH 05/69] minor fix --- streamflow/cwl/processor.py | 6 ++++-- streamflow/workflow/step.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/streamflow/cwl/processor.py b/streamflow/cwl/processor.py index b90c3b9d1..19a58c566 100644 --- a/streamflow/cwl/processor.py +++ b/streamflow/cwl/processor.py @@ -165,7 +165,7 @@ async def _load( format_graph=( format_graph.parse(data=row["format_graph"]) if row["format_graph"] is not None - else format_graph + else None ), # todo: fix multiple instance full_js=row["full_js"], load_contents=row["load_contents"], @@ -303,7 +303,9 @@ async def _save_additional_params(self, context: StreamFlowContext): "expression_lib": self.expression_lib, "file_format": self.file_format, "format_graph": ( - self.format_graph.serialize() if self.format_graph else None + self.format_graph.serialize() + if self.format_graph is not None + else None ), "full_js": self.full_js, "load_contents": self.load_contents, diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index d36a2b9e4..b56395693 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -1279,7 +1279,7 @@ async def _load( if change_wf else await loading_context.load_workflow(context, row["workflow"]), binding_config=await BindingConfig.load( - context, params["binding_config"], loading_context, change_wf + context, params["binding_config"], loading_context ), connector_ports={ k: cast( From 3f3a10e184bd32453a54bd51d874f145f1d7e822 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 17:45:44 +0100 Subject: [PATCH 06/69] moved some general-purpose methods in a utils file --- tests/utils/get_instances.py | 220 +++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 tests/utils/get_instances.py diff --git a/tests/utils/get_instances.py b/tests/utils/get_instances.py new file mode 100644 index 000000000..8b9461c74 --- /dev/null +++ b/tests/utils/get_instances.py @@ -0,0 +1,220 @@ +import asyncio +import os +import posixpath +import tempfile +from jinja2 import Template +from typing import MutableSequence, cast + +import asyncssh +import asyncssh.public_key +import pkg_resources + +from streamflow.core import utils +from streamflow.core.config import BindingConfig +from streamflow.core.context import StreamFlowContext +from streamflow.core.deployment import Target, DeploymentConfig, LOCAL_LOCATION +from streamflow.core.workflow import Workflow, Port +from streamflow.workflow.combinator import ( + DotProductCombinator, + LoopTerminationCombinator, + CartesianProductCombinator, +) +from streamflow.workflow.port import ConnectorPort +from streamflow.workflow.step import DeployStep, ScheduleStep + + +def get_docker_deployment_config(): + return DeploymentConfig( + name="alpine-docker", + type="docker", + config={"image": "alpine:3.16.2"}, + external=False, + lazy=False, + ) + + +async def get_deployment_config( + _context: StreamFlowContext, deployment_t: str +) -> DeploymentConfig: + if deployment_t == "local": + return get_local_deployment_config() + elif deployment_t == "docker": + return get_docker_deployment_config() + elif deployment_t == "kubernetes": + return get_kubernetes_deployment_config() + elif deployment_t == "singularity": + return get_singularity_deployment_config() + elif deployment_t == "ssh": + return await get_ssh_deployment_config(_context) + else: + raise Exception(f"{deployment_t} deployment type not supported") + + +def get_service(_context: StreamFlowContext, deployment_t: str) -> str | None: + if deployment_t == "local": + return None + elif deployment_t == "docker": + return None + elif deployment_t == "kubernetes": + return "sf-test" + elif deployment_t == "singularity": + return None + elif deployment_t == "ssh": + return None + else: + raise Exception(f"{deployment_t} deployment type not supported") + + +def get_kubernetes_deployment_config(): + with open(pkg_resources.resource_filename(__name__, "pod.jinja2")) as t: + template = Template(t.read()) + with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: + template.stream(name=utils.random_name()).dump(f.name) + return DeploymentConfig( + name="alpine-kubernetes", + type="kubernetes", + config={"files": [f.name]}, + external=False, + lazy=False, + ) + + +def get_singularity_deployment_config(): + return DeploymentConfig( + name="alpine-singularity", + type="singularity", + config={"image": "docker://alpine:3.16.2"}, + external=False, + lazy=False, + ) + + +async def get_ssh_deployment_config(_context: StreamFlowContext): + skey = asyncssh.public_key.generate_private_key( + alg_name="ssh-rsa", + comment="streamflow-test", + key_size=4096, + ) + public_key = skey.export_public_key().decode("utf-8") + with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: + skey.write_private_key(f.name) + docker_config = DeploymentConfig( + name="linuxserver-ssh-docker", + type="docker", + config={ + "image": "lscr.io/linuxserver/openssh-server", + "env": [f"PUBLIC_KEY={public_key}"], + "init": False, + "publish": ["2222:2222"], + }, + external=False, + lazy=False, + ) + await _context.deployment_manager.deploy(docker_config) + await asyncio.sleep(5) + return DeploymentConfig( + name="linuxserver-ssh", + type="ssh", + config={ + "nodes": [ + { + "checkHostKey": False, + "hostname": "127.0.0.1:2222", + "sshKey": f.name, + "username": "linuxserver.io", + } + ], + "maxConcurrentSessions": 10, + }, + external=False, + lazy=False, + ) + + +def get_local_deployment_config(): + return DeploymentConfig( + name=LOCAL_LOCATION, + type="local", + config={}, + external=True, + lazy=False, + workdir=os.path.realpath(tempfile.gettempdir()), + ) + + +def create_deploy_step(workflow, deployment_config=None): + connector_port = workflow.create_port(cls=ConnectorPort) + if not deployment_config: + deployment_config = get_docker_deployment_config() + return workflow.create_step( + cls=DeployStep, + name=posixpath.join("__deploy__", deployment_config.name), + deployment_config=deployment_config, + connector_port=connector_port, + ) + + +def create_schedule_step( + workflow: Workflow, + deploy_steps: MutableSequence[DeployStep], + binding_config: BindingConfig = None, +): + """It is necessary to pass in the correct order biding_config.targets and deploy_steps for the mapping""" + if not binding_config: + binding_config = BindingConfig( + targets=[ + Target( + deployment=deploy_step.deployment_config, + workdir=utils.random_name(), + ) + for deploy_step in deploy_steps + ] + ) + return workflow.create_step( + cls=ScheduleStep, + name=posixpath.join(utils.random_name(), "__schedule__"), + job_prefix="something", + connector_ports={ + target.deployment.name: deploy_step.get_output_port() + for target, deploy_step in zip(binding_config.targets, deploy_steps) + }, + binding_config=binding_config, + ) + + +async def create_workflow( + context: StreamFlowContext, num_port: int = 2 +) -> tuple[Workflow, tuple[Port]]: + workflow = Workflow( + context=context, type="cwl", name=utils.random_name(), config={} + ) + ports = [] + for _ in range(num_port): + ports.append(workflow.create_port()) + await workflow.save(context) + return workflow, tuple(cast(MutableSequence[Port], ports)) + + +def get_dot_combinator(): + return DotProductCombinator(name=utils.random_name(), workflow=None) + + +def get_cartesian_product_combinator(): + return CartesianProductCombinator(name=utils.random_name(), workflow=None) + + +def get_loop_terminator_combinator(): + c = LoopTerminationCombinator(name=utils.random_name(), workflow=None) + c.add_output_item("test1") + c.add_output_item("test2") + return c + + +def get_nested_crossproduct(): + combinator = DotProductCombinator(name=utils.random_name(), workflow=None) + c1 = CartesianProductCombinator(name=utils.random_name(), workflow=None) + c1.add_item("ext") + c1.add_item("inn") + items = c1.get_items(False) + combinator.add_combinator(c1, items) + return combinator From e9868402bbf42f881d90af5043757e3faf3e4d65 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 17:46:22 +0100 Subject: [PATCH 07/69] added tests for change_wf --- streamflow/core/workflow.py | 10 +- tests/test_change_wf.py | 325 ++++++++++++++++++++++++++++++++++++ tests/test_cwl_change_wf.py | 215 ++++++++++++++++++++++++ 3 files changed, 548 insertions(+), 2 deletions(-) create mode 100644 tests/test_change_wf.py create mode 100644 tests/test_cwl_change_wf.py diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 01532fa5d..4ce37c070 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -658,16 +658,22 @@ def create_port(self, cls: type[P] = Port, name: str = None, **kwargs) -> P: if name is None: name = str(uuid.uuid4()) port = cls(workflow=self, name=name, **kwargs) - self.ports[name] = port + self.add_port(port) return port def create_step(self, cls: type[S], name: str = None, **kwargs) -> S: if name is None: name = str(uuid.uuid4()) step = cls(name=name, workflow=self, **kwargs) - self.steps[name] = step + self.add_step(step) return step + def add_port(self, port: Port): + self.ports[port.name] = port + + def add_step(self, step: Step): + self.steps[step.name] = step + def get_output_port(self, name: str) -> Port: return self.ports[self.output_ports[name]] diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py new file mode 100644 index 000000000..30e5324ee --- /dev/null +++ b/tests/test_change_wf.py @@ -0,0 +1,325 @@ +from typing import Type, cast, MutableSequence + +import pytest + +from streamflow.core import utils +from streamflow.core.config import BindingConfig, Config +from streamflow.core.context import StreamFlowContext +from streamflow.core.deployment import LocalTarget +from streamflow.core.workflow import Workflow, Port, Step +from streamflow.cwl.command import CWLCommand, CWLCommandToken +from streamflow.cwl.translator import _create_command_output_processor_base +from streamflow.persistence.loading_context import DefaultDatabaseLoadingContext +from streamflow.workflow.combinator import LoopCombinator +from streamflow.workflow.port import ConnectorPort, JobPort +from streamflow.workflow.step import ( + CombinatorStep, + ExecuteStep, + Combinator, + LoopCombinatorStep, + GatherStep, + ScatterStep, +) +from tests.conftest import ( + are_equals, + object_to_dict, +) +from tests.utils.get_instances import ( + create_workflow, + create_schedule_step, + create_deploy_step, + get_dot_combinator, + get_cartesian_product_combinator, + get_loop_terminator_combinator, + get_nested_crossproduct, +) + + +async def base_step_test_process( + workflow, step_cls, kwargs_step, context, test_are_eq=True +): + step = workflow.create_step(cls=step_cls, **kwargs_step) + await workflow.save(context) + new_workflow, new_step = await clone_step(step, workflow, context) + persistent_id_test(workflow, new_workflow, step, new_step) + if test_are_eq: + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + assert are_equals(step, new_step) + return step, new_workflow, new_step + + +def persistent_id_test(original_workflow, new_workflow, original_elem, new_elem): + assert original_workflow.persistent_id + assert new_workflow.persistent_id + assert original_workflow.persistent_id != new_workflow.persistent_id + if isinstance(original_elem, Step): + assert new_elem.name in new_workflow.steps.keys() + if isinstance(original_elem, Port): + assert new_elem.name in new_workflow.ports.keys() + assert original_elem.persistent_id != new_elem.persistent_id + assert new_elem.workflow.persistent_id == new_workflow.persistent_id + + +async def general_test_port(context: StreamFlowContext, cls_port: Type[Port]): + workflow = Workflow( + context=context, type="cwl", name=utils.random_name(), config={} + ) + port = workflow.create_port(cls_port) + await workflow.save(context) + assert workflow.persistent_id + assert port.persistent_id + + loading_context = DefaultDatabaseLoadingContext() + new_workflow = Workflow( + context=context, type="cwl", name=utils.random_name(), config={} + ) + new_port = await Port.load( + context, port.persistent_id, loading_context, new_workflow + ) + new_workflow.add_port(new_port) + await new_workflow.save(context) + persistent_id_test(workflow, new_workflow, port, new_port) + port.persistent_id = None + new_port.persistent_id = None + port.workflow = None + new_port.workflow = None + assert are_equals(port, new_port) + + +def set_val_to_attributes(elem, str_attributes: MutableSequence[str], val): + attrs = object_to_dict(elem) + for attr in str_attributes: + if attr in attrs.keys(): + setattr(elem, attr, val) + + +def set_workflow_in_combinator(combinator, workflow): + combinator.workflow = workflow + if not combinator.combinators: + return + for c in combinator.combinators.values(): + set_workflow_in_combinator(c, workflow) + + +async def clone_step(step, workflow, context): + new_workflow = Workflow( + context=context, type="cwl", name=utils.random_name(), config={} + ) + loading_context = DefaultDatabaseLoadingContext() + new_step = await Step.load( + context, step.persistent_id, loading_context, new_workflow + ) + new_workflow.add_step(new_step) + + # ports are not loaded in new_workflow. It is necessary to do it manually + for port in workflow.ports.values(): + new_workflow.add_port( + await Port.load(context, port.persistent_id, loading_context, new_workflow) + ) + await new_workflow.save(context) + return new_workflow, new_step + + +@pytest.mark.asyncio +async def test_port(context: StreamFlowContext): + """Test saving Port on database and re-load it in a new Workflow""" + await general_test_port(context, Port) + + +@pytest.mark.asyncio +async def test_job_port(context: StreamFlowContext): + """Test saving JobPort on database and re-load it in a new Workflow""" + await general_test_port(context, JobPort) + + +@pytest.mark.asyncio +async def test_connection_port(context: StreamFlowContext): + """Test saving ConnectorPort on database and re-load it in a new Workflow""" + await general_test_port(context, ConnectorPort) + + +@pytest.mark.asyncio +async def test_execute_step(context: StreamFlowContext): + """Test saving ExecuteStep on database and re-load it in a new Workflow""" + workflow, (job_port, in_port, out_port) = await create_workflow(context, num_port=3) + + in_port_name = "in-1" + out_port_name = "out-1" + step = workflow.create_step( + cls=ExecuteStep, name=utils.random_name(), job_port=cast(JobPort, job_port) + ) + step.command = CWLCommand( + step=step, + base_command=["echo"], + command_tokens=[CWLCommandToken(name=in_port_name, value=None)], + ) + step.add_output_port( + out_port_name, + out_port, + _create_command_output_processor_base( + out_port.name, + workflow, + None, + "string", + {}, + {"hints": {}, "requirements": {}}, + ), + ) + step.add_input_port(in_port_name, in_port) + await workflow.save(context) + new_workflow, new_step = await clone_step(step, workflow, context) + persistent_id_test(workflow, new_workflow, step, new_step) + + step.command.step = None + new_step.command.step = None + for original_processor, new_processor in zip( + step.output_processors.values(), new_step.output_processors.values() + ): + set_val_to_attributes(original_processor, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_processor, ["persistent_id", "workflow"], None) + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_schedule_step(context: StreamFlowContext): + """Test saving ScheduleStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=0))[0] + deploy_step = create_deploy_step(workflow) + step = create_schedule_step( + workflow, + [deploy_step, deploy_step], + BindingConfig( + targets=[LocalTarget(), LocalTarget()], + filters=[ + Config( + config={"hello": "world"}, name=utils.random_name(), type="shuffle" + ), + Config( + config={"ciao": "mondo"}, name=utils.random_name(), type="linear" + ), + ], + ), + ) + await workflow.save(context) + new_workflow, new_step = await clone_step(step, workflow, context) + persistent_id_test(workflow, new_workflow, step, new_step) + + for original_filter, new_filter in zip( + step.binding_config.filters, new_step.binding_config.filters + ): + set_val_to_attributes(original_filter, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_filter, ["persistent_id", "workflow"], None) + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "combinator", + [ + get_dot_combinator(), + get_cartesian_product_combinator(), + get_loop_terminator_combinator(), + get_nested_crossproduct(), + ], + ids=[ + "dot_combinator", + "cartesian_product_combinator", + "loop_termination_combinator", + "nested_crossproduct", + ], +) +async def test_combinator_step(context: StreamFlowContext, combinator: Combinator): + """Test saving CombinatorStep on database and re-load it in a new Workflow""" + workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( + context, num_port=4 + ) + set_workflow_in_combinator(combinator, workflow) + step = workflow.create_step( + cls=CombinatorStep, + name=utils.random_name() + "-combinator", + combinator=combinator, + ) + port_name = "test" + step.add_input_port(port_name, in_port) + step.add_output_port(port_name, out_port) + + port_name_2 = f"{port_name}_2" + step.add_input_port(port_name_2, in_port_2) + step.add_output_port(port_name_2, out_port_2) + + await workflow.save(context) + new_workflow, new_step = await clone_step(step, workflow, context) + persistent_id_test(workflow, new_workflow, step, new_step) + + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + set_workflow_in_combinator(step.combinator, None) + set_workflow_in_combinator(new_step.combinator, None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_loop_combinator_step(context: StreamFlowContext): + """Test saving LoopCombinatorStep on database and re-load it in a new Workflow""" + workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( + context, num_port=4 + ) + name = utils.random_name() + step = workflow.create_step( + cls=LoopCombinatorStep, + name=name + "-combinator", + combinator=LoopCombinator(name=name, workflow=workflow), + ) + port_name = "test" + step.add_input_port(port_name, in_port) + step.add_output_port(port_name, out_port) + + port_name_2 = f"{port_name}_2" + step.add_input_port(port_name_2, in_port_2) + step.add_output_port(port_name_2, out_port_2) + + await workflow.save(context) + new_workflow, new_step = await clone_step(step, workflow, context) + persistent_id_test(workflow, new_workflow, step, new_step) + + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + set_workflow_in_combinator(step.combinator, None) + set_workflow_in_combinator(new_step.combinator, None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_deploy_step(context: StreamFlowContext): + """Test saving DeployStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=0))[0] + step = create_deploy_step(workflow) + await workflow.save(context) + new_workflow, new_step = await clone_step(step, workflow, context) + persistent_id_test(workflow, new_workflow, step, new_step) + + +@pytest.mark.asyncio +async def test_gather_step(context: StreamFlowContext): + """Test saving GatherStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=0))[0] + await base_step_test_process( + workflow, + GatherStep, + {"name": utils.random_name() + "-gather", "depth": 1}, + context, + ) + + +@pytest.mark.asyncio +async def test_scatter_step(context: StreamFlowContext): + """Test saving ScatterStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=0))[0] + await base_step_test_process( + workflow, ScatterStep, {"name": utils.random_name() + "-scatter"}, context + ) diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py new file mode 100644 index 000000000..994a2a611 --- /dev/null +++ b/tests/test_cwl_change_wf.py @@ -0,0 +1,215 @@ +import posixpath + +import pytest + +from streamflow.core import utils +from streamflow.core.context import StreamFlowContext +from streamflow.cwl.combinator import ListMergeCombinator +from streamflow.cwl.processor import CWLTokenProcessor +from streamflow.cwl.step import ( + CWLConditionalStep, + CWLTransferStep, + CWLInputInjectorStep, + CWLEmptyScatterConditionalStep, + CWLLoopOutputAllStep, +) +from streamflow.cwl.transformer import ( + DefaultTransformer, + DefaultRetagTransformer, + CWLTokenTransformer, + LoopValueFromTransformer, +) +from streamflow.workflow.step import CombinatorStep +from tests.conftest import ( + are_equals, +) +from tests.test_change_wf import ( + persistent_id_test, + set_val_to_attributes, + base_step_test_process, + set_workflow_in_combinator, +) +from tests.utils.get_instances import create_workflow + + +@pytest.mark.asyncio +async def test_default_transformer(context: StreamFlowContext): + """Test saving DefaultTransformer on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await base_step_test_process( + workflow, + DefaultTransformer, + {"name": utils.random_name() + "-transformer", "default_port": port}, + context, + ) + + +@pytest.mark.asyncio +async def test_default_retag_transformer(context: StreamFlowContext): + """Test saving DefaultRetagTransformer on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await base_step_test_process( + workflow, + DefaultRetagTransformer, + {"name": utils.random_name() + "-transformer", "default_port": port}, + context, + ) + + +@pytest.mark.asyncio +async def test_cwl_token_transformer(context: StreamFlowContext): + """Test saving CWLTokenProcessor on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + step_name = utils.random_name() + step, new_workflow, new_step = await base_step_test_process( + workflow, + CWLTokenTransformer, + { + "name": step_name + "-transformer", + "port_name": "test", + "processor": CWLTokenProcessor( + name=step_name, + workflow=workflow, + ), + }, + context, + test_are_eq=False, + ) + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + set_val_to_attributes(step.processor, ["workflow"], None) + set_val_to_attributes(new_step.processor, ["workflow"], None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_cwl_conditional_step(context: StreamFlowContext): + """Test saving CWLConditionalStep on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await base_step_test_process( + workflow, + CWLConditionalStep, + { + "name": utils.random_name() + "-when", + "expression": f"$(inputs.{utils.random_name()}.length == 1)", + "full_js": True, + }, + context, + ) + + +@pytest.mark.asyncio +async def test_cwl_transfer_step(context: StreamFlowContext): + """Test saving CWLTransferStep on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await base_step_test_process( + workflow, + CWLTransferStep, + { + "name": posixpath.join(utils.random_name(), "__transfer__", "test"), + "job_port": port, + }, + context, + ) + + +@pytest.mark.asyncio +async def test_cwl_input_injector_step(context: StreamFlowContext): + """Test saving CWLInputInjectorStep on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await base_step_test_process( + workflow, + CWLInputInjectorStep, + { + "name": utils.random_name() + "-injector", + "job_port": port, + }, + context, + ) + + +@pytest.mark.asyncio +async def test_empty_scatter_conditional_step(context: StreamFlowContext): + """Test saving CWLEmptyScatterConditionalStep on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await base_step_test_process( + workflow, + CWLEmptyScatterConditionalStep, + { + "name": utils.random_name() + "-empty-scatter-condition", + "scatter_method": "dotproduct", + }, + context, + ) + + +@pytest.mark.asyncio +async def test_list_merge_combinator(context: StreamFlowContext): + """Test saving ListMergeCombinator on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + step, new_workflow, new_step = await base_step_test_process( + workflow, + CombinatorStep, + { + "name": utils.random_name() + "-combinator", + "combinator": ListMergeCombinator( + name=utils.random_name(), + workflow=workflow, + input_names=[port.name], + output_name=port.name, + flatten=False, + ), + }, + context, + test_are_eq=False, + ) + persistent_id_test(workflow, new_workflow, step, new_step) + + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + set_workflow_in_combinator(step.combinator, None) + set_workflow_in_combinator(new_step.combinator, None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_loop_value_from_transformer(context: StreamFlowContext): + """Test saving LoopValueFromTransformer on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + step, new_workflow, new_step = await base_step_test_process( + workflow, + LoopValueFromTransformer, + { + "name": utils.random_name() + "-loop-value-from-transformer", + "processor": CWLTokenProcessor( + name=port.name, + workflow=workflow, + ), + "port_name": port.name, + "full_js": True, + "value_from": f"$(inputs.{port.name} + 1)", + }, + context, + test_are_eq=False, + ) + persistent_id_test(workflow, new_workflow, step, new_step) + + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + set_val_to_attributes(step.processor, ["workflow"], None) + set_val_to_attributes(new_step.processor, ["workflow"], None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_cwl_loop_output_all_step(context: StreamFlowContext): + """Test saving CWLLoopOutputAllStep on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await base_step_test_process( + workflow, + CWLLoopOutputAllStep, + { + "name": utils.random_name() + "-loop-output", + }, + context, + ) From 2163be6cff6a3eb49232bf5a6aacfdb1df1351b7 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 18:15:32 +0100 Subject: [PATCH 08/69] added new asserts --- tests/test_change_wf.py | 16 ++++++++++++---- tests/test_cwl_change_wf.py | 5 ++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py index 30e5324ee..2ddb86721 100644 --- a/tests/test_change_wf.py +++ b/tests/test_change_wf.py @@ -93,11 +93,13 @@ def set_val_to_attributes(elem, str_attributes: MutableSequence[str], val): if attr in attrs.keys(): setattr(elem, attr, val) +def workflow_in_combinator_test(original_combinator, new_combinator): + assert original_combinator.workflow.persistent_id != new_combinator.workflow.persistent_id + for original_inner, new_inner in zip(original_combinator.combinators.values(), new_combinator.combinators.values()): + workflow_in_combinator_test(original_inner, new_inner) def set_workflow_in_combinator(combinator, workflow): combinator.workflow = workflow - if not combinator.combinators: - return for c in combinator.combinators.values(): set_workflow_in_combinator(c, workflow) @@ -171,13 +173,15 @@ async def test_execute_step(context: StreamFlowContext): new_workflow, new_step = await clone_step(step, workflow, context) persistent_id_test(workflow, new_workflow, step, new_step) + assert step.command.step.persistent_id != new_step.command.step.persistent_id step.command.step = None new_step.command.step = None for original_processor, new_processor in zip( step.output_processors.values(), new_step.output_processors.values() ): - set_val_to_attributes(original_processor, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_processor, ["persistent_id", "workflow"], None) + assert original_processor.workflow.persistent_id != new_processor.workflow.persistent_id + set_val_to_attributes(original_processor, ["workflow"], None) + set_val_to_attributes(new_processor, ["workflow"], None) set_val_to_attributes(step, ["persistent_id", "workflow"], None) set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) assert are_equals(step, new_step) @@ -210,6 +214,8 @@ async def test_schedule_step(context: StreamFlowContext): for original_filter, new_filter in zip( step.binding_config.filters, new_step.binding_config.filters ): + # Config are read-only so workflows can share the same + assert original_filter.persistent_id == new_filter.persistent_id set_val_to_attributes(original_filter, ["persistent_id", "workflow"], None) set_val_to_attributes(new_filter, ["persistent_id", "workflow"], None) set_val_to_attributes(step, ["persistent_id", "workflow"], None) @@ -258,6 +264,7 @@ async def test_combinator_step(context: StreamFlowContext, combinator: Combinato set_val_to_attributes(step, ["persistent_id", "workflow"], None) set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + workflow_in_combinator_test(step.combinator, new_step.combinator) set_workflow_in_combinator(step.combinator, None) set_workflow_in_combinator(new_step.combinator, None) assert are_equals(step, new_step) @@ -289,6 +296,7 @@ async def test_loop_combinator_step(context: StreamFlowContext): set_val_to_attributes(step, ["persistent_id", "workflow"], None) set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + workflow_in_combinator_test(step.combinator, new_step.combinator) set_workflow_in_combinator(step.combinator, None) set_workflow_in_combinator(new_step.combinator, None) assert are_equals(step, new_step) diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py index 994a2a611..27165aa92 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_change_wf.py @@ -27,7 +27,7 @@ persistent_id_test, set_val_to_attributes, base_step_test_process, - set_workflow_in_combinator, + set_workflow_in_combinator, workflow_in_combinator_test, ) from tests.utils.get_instances import create_workflow @@ -77,6 +77,7 @@ async def test_cwl_token_transformer(context: StreamFlowContext): ) set_val_to_attributes(step, ["persistent_id", "workflow"], None) set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + assert step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id set_val_to_attributes(step.processor, ["workflow"], None) set_val_to_attributes(new_step.processor, ["workflow"], None) assert are_equals(step, new_step) @@ -167,6 +168,7 @@ async def test_list_merge_combinator(context: StreamFlowContext): set_val_to_attributes(step, ["persistent_id", "workflow"], None) set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + workflow_in_combinator_test(step.combinator, new_step.combinator) set_workflow_in_combinator(step.combinator, None) set_workflow_in_combinator(new_step.combinator, None) assert are_equals(step, new_step) @@ -196,6 +198,7 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): set_val_to_attributes(step, ["persistent_id", "workflow"], None) set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + assert step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id set_val_to_attributes(step.processor, ["workflow"], None) set_val_to_attributes(new_step.processor, ["workflow"], None) assert are_equals(step, new_step) From eb57f50ac0e71d1e6a68f542ae4600a98b7d77c1 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 18:16:44 +0100 Subject: [PATCH 09/69] fix loading of TokenProcessor in ValueFromTransformer --- streamflow/cwl/transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streamflow/cwl/transformer.py b/streamflow/cwl/transformer.py index b3287a88f..819641cfd 100644 --- a/streamflow/cwl/transformer.py +++ b/streamflow/cwl/transformer.py @@ -261,7 +261,7 @@ async def _load( else await loading_context.load_workflow(context, row["workflow"]), port_name=params["port_name"], processor=await TokenProcessor.load( - context, params["processor"], loading_context + context, params["processor"], loading_context, change_wf ), value_from=params["value_from"], expression_lib=params["expression_lib"], From 5a669d3cf1832377dda1e6e4201c5fbebf255ff6 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 18:22:50 +0100 Subject: [PATCH 10/69] fix k8s template path --- tests/utils/get_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/get_instances.py b/tests/utils/get_instances.py index 8b9461c74..deab97132 100644 --- a/tests/utils/get_instances.py +++ b/tests/utils/get_instances.py @@ -66,7 +66,7 @@ def get_service(_context: StreamFlowContext, deployment_t: str) -> str | None: def get_kubernetes_deployment_config(): - with open(pkg_resources.resource_filename(__name__, "pod.jinja2")) as t: + with open(pkg_resources.resource_filename(__name__, "./pod.jinja2")) as t: template = Template(t.read()) with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: template.stream(name=utils.random_name()).dump(f.name) From fc86b92313a871e49c6114905a6c0c0eaa135630 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 7 Nov 2023 18:39:45 +0100 Subject: [PATCH 11/69] test on other Steps --- tests/test_change_wf.py | 16 ++++- tests/test_cwl_change_wf.py | 127 ++++++++++++++++++++++++++++++++++-- 2 files changed, 133 insertions(+), 10 deletions(-) diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py index 2ddb86721..e4e5b218a 100644 --- a/tests/test_change_wf.py +++ b/tests/test_change_wf.py @@ -93,11 +93,18 @@ def set_val_to_attributes(elem, str_attributes: MutableSequence[str], val): if attr in attrs.keys(): setattr(elem, attr, val) + def workflow_in_combinator_test(original_combinator, new_combinator): - assert original_combinator.workflow.persistent_id != new_combinator.workflow.persistent_id - for original_inner, new_inner in zip(original_combinator.combinators.values(), new_combinator.combinators.values()): + assert ( + original_combinator.workflow.persistent_id + != new_combinator.workflow.persistent_id + ) + for original_inner, new_inner in zip( + original_combinator.combinators.values(), new_combinator.combinators.values() + ): workflow_in_combinator_test(original_inner, new_inner) + def set_workflow_in_combinator(combinator, workflow): combinator.workflow = workflow for c in combinator.combinators.values(): @@ -179,7 +186,10 @@ async def test_execute_step(context: StreamFlowContext): for original_processor, new_processor in zip( step.output_processors.values(), new_step.output_processors.values() ): - assert original_processor.workflow.persistent_id != new_processor.workflow.persistent_id + assert ( + original_processor.workflow.persistent_id + != new_processor.workflow.persistent_id + ) set_val_to_attributes(original_processor, ["workflow"], None) set_val_to_attributes(new_processor, ["workflow"], None) set_val_to_attributes(step, ["persistent_id", "workflow"], None) diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py index 27165aa92..74b89dcef 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_change_wf.py @@ -18,6 +18,12 @@ DefaultRetagTransformer, CWLTokenTransformer, LoopValueFromTransformer, + ValueFromTransformer, + AllNonNullTransformer, + FirstNonNullTransformer, + ForwardTransformer, + ListToElementTransformer, + OnlyNonNullTransformer, ) from streamflow.workflow.step import CombinatorStep from tests.conftest import ( @@ -27,7 +33,8 @@ persistent_id_test, set_val_to_attributes, base_step_test_process, - set_workflow_in_combinator, workflow_in_combinator_test, + set_workflow_in_combinator, + workflow_in_combinator_test, ) from tests.utils.get_instances import create_workflow @@ -56,10 +63,110 @@ async def test_default_retag_transformer(context: StreamFlowContext): ) +@pytest.mark.asyncio +async def test_value_from_transformer(context: StreamFlowContext): + """Test saving ValueFromTransformer on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + step, new_workflow, new_step = await base_step_test_process( + workflow, + ValueFromTransformer, + { + "name": utils.random_name() + "-value-from-transformer", + "processor": CWLTokenProcessor( + name=port.name, + workflow=workflow, + ), + "port_name": port.name, + "full_js": True, + "value_from": f"$(inputs.{port.name} + 1)", + }, + context, + test_are_eq=False, + ) + set_val_to_attributes(step, ["persistent_id", "workflow"], None) + set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + assert ( + step.processor.workflow.persistent_id + != new_step.processor.workflow.persistent_id + ) + set_val_to_attributes(step.processor, ["workflow"], None) + set_val_to_attributes(new_step.processor, ["workflow"], None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_all_non_null_transformer(context: StreamFlowContext): + """Test saving AllNonNullTransformer on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=1))[0] + await base_step_test_process( + workflow, + AllNonNullTransformer, + { + "name": utils.random_name() + "-transformer", + }, + context, + ) + + +@pytest.mark.asyncio +async def test_first_non_null_transformer(context: StreamFlowContext): + """Test saving FirstNonNullTransformer on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=1))[0] + await base_step_test_process( + workflow, + FirstNonNullTransformer, + { + "name": utils.random_name() + "-transformer", + }, + context, + ) + + +@pytest.mark.asyncio +async def test_forward_transformer(context: StreamFlowContext): + """Test saving ForwardTransformer on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=1))[0] + await base_step_test_process( + workflow, + ForwardTransformer, + { + "name": utils.random_name() + "-transformer", + }, + context, + ) + + +@pytest.mark.asyncio +async def test_list_to_element_transformer(context: StreamFlowContext): + """Test saving FirstNonNullTransformer on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=1))[0] + await base_step_test_process( + workflow, + ListToElementTransformer, + { + "name": utils.random_name() + "-transformer", + }, + context, + ) + + +@pytest.mark.asyncio +async def test_only_non_null_transformer(context: StreamFlowContext): + workflow = (await create_workflow(context, num_port=1))[0] + await base_step_test_process( + workflow, + OnlyNonNullTransformer, + { + "name": utils.random_name() + "-transformer", + }, + context, + ) + + @pytest.mark.asyncio async def test_cwl_token_transformer(context: StreamFlowContext): """Test saving CWLTokenProcessor on database and re-load it in a new Workflow""" - workflow, (port,) = await create_workflow(context, num_port=1) + workflow = (await create_workflow(context, num_port=1))[0] step_name = utils.random_name() step, new_workflow, new_step = await base_step_test_process( workflow, @@ -77,7 +184,10 @@ async def test_cwl_token_transformer(context: StreamFlowContext): ) set_val_to_attributes(step, ["persistent_id", "workflow"], None) set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) - assert step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id + assert ( + step.processor.workflow.persistent_id + != new_step.processor.workflow.persistent_id + ) set_val_to_attributes(step.processor, ["workflow"], None) set_val_to_attributes(new_step.processor, ["workflow"], None) assert are_equals(step, new_step) @@ -86,7 +196,7 @@ async def test_cwl_token_transformer(context: StreamFlowContext): @pytest.mark.asyncio async def test_cwl_conditional_step(context: StreamFlowContext): """Test saving CWLConditionalStep on database and re-load it in a new Workflow""" - workflow, (port,) = await create_workflow(context, num_port=1) + workflow = (await create_workflow(context, num_port=1))[0] await base_step_test_process( workflow, CWLConditionalStep, @@ -132,7 +242,7 @@ async def test_cwl_input_injector_step(context: StreamFlowContext): @pytest.mark.asyncio async def test_empty_scatter_conditional_step(context: StreamFlowContext): """Test saving CWLEmptyScatterConditionalStep on database and re-load it in a new Workflow""" - workflow, (port,) = await create_workflow(context, num_port=1) + workflow = (await create_workflow(context, num_port=1))[0] await base_step_test_process( workflow, CWLEmptyScatterConditionalStep, @@ -198,7 +308,10 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): set_val_to_attributes(step, ["persistent_id", "workflow"], None) set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) - assert step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id + assert ( + step.processor.workflow.persistent_id + != new_step.processor.workflow.persistent_id + ) set_val_to_attributes(step.processor, ["workflow"], None) set_val_to_attributes(new_step.processor, ["workflow"], None) assert are_equals(step, new_step) @@ -207,7 +320,7 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): @pytest.mark.asyncio async def test_cwl_loop_output_all_step(context: StreamFlowContext): """Test saving CWLLoopOutputAllStep on database and re-load it in a new Workflow""" - workflow, (port,) = await create_workflow(context, num_port=1) + workflow = (await create_workflow(context, num_port=1))[0] await base_step_test_process( workflow, CWLLoopOutputAllStep, From 220677652dab63d292c1db5f3234e8ba126adb2c Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Wed, 8 Nov 2023 00:52:37 +0100 Subject: [PATCH 12/69] minor fix skip ports loading --- streamflow/cwl/step.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/streamflow/cwl/step.py b/streamflow/cwl/step.py index e1559b627..f31649e16 100644 --- a/streamflow/cwl/step.py +++ b/streamflow/cwl/step.py @@ -29,6 +29,7 @@ LoopOutputStep, TransferStep, _get_token_ids, + _get_port, ) from streamflow.workflow.token import IterationTerminationToken, ListToken, ObjectToken @@ -153,8 +154,10 @@ async def _load( params["skip_ports"].keys(), await asyncio.gather( *( - asyncio.create_task(loading_context.load_port(context, port_id)) - for port_id in params["skip_ports"].values() + asyncio.create_task( + _get_port(context, value, loading_context, change_wf) + ) + for value in params["skip_ports"].values() ) ), ): From 49224fa5a1dae83447a532741d21003f7f71862e Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Wed, 8 Nov 2023 00:53:31 +0100 Subject: [PATCH 13/69] fix comments --- streamflow/core/utils.py | 2 +- tests/test_cwl_change_wf.py | 5 +++-- tests/utils/get_instances.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/streamflow/core/utils.py b/streamflow/core/utils.py index 0090fcdf7..58c8181c7 100644 --- a/streamflow/core/utils.py +++ b/streamflow/core/utils.py @@ -295,7 +295,7 @@ async def get_dependencies( ) return {d["name"]: p.name for d, p in zip(dependency_rows, ports)} else: - # it is not helpful to have an instance in loading_context when it is building a new workflow + # it is not helpful to have the Port instance in loading_context when it is loading on a new workflow port_rows = await asyncio.gather( *( asyncio.create_task(context.database.get_port(d["port"])) diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py index 74b89dcef..dd91064d2 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_change_wf.py @@ -138,7 +138,7 @@ async def test_forward_transformer(context: StreamFlowContext): @pytest.mark.asyncio async def test_list_to_element_transformer(context: StreamFlowContext): - """Test saving FirstNonNullTransformer on database and re-load it in a new Workflow""" + """Test saving ListToElementTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] await base_step_test_process( workflow, @@ -152,6 +152,7 @@ async def test_list_to_element_transformer(context: StreamFlowContext): @pytest.mark.asyncio async def test_only_non_null_transformer(context: StreamFlowContext): + """Test saving OnlyNonNullTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] await base_step_test_process( workflow, @@ -165,7 +166,7 @@ async def test_only_non_null_transformer(context: StreamFlowContext): @pytest.mark.asyncio async def test_cwl_token_transformer(context: StreamFlowContext): - """Test saving CWLTokenProcessor on database and re-load it in a new Workflow""" + """Test saving CWLTokenTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] step_name = utils.random_name() step, new_workflow, new_step = await base_step_test_process( diff --git a/tests/utils/get_instances.py b/tests/utils/get_instances.py index deab97132..5086481b2 100644 --- a/tests/utils/get_instances.py +++ b/tests/utils/get_instances.py @@ -159,7 +159,7 @@ def create_schedule_step( deploy_steps: MutableSequence[DeployStep], binding_config: BindingConfig = None, ): - """It is necessary to pass in the correct order biding_config.targets and deploy_steps for the mapping""" + # It is necessary to pass in the correct order biding_config.targets and deploy_steps for the mapping if not binding_config: binding_config = BindingConfig( targets=[ From 014b8daa12bccaef17230da3fcad9eb3fef08f6f Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Wed, 8 Nov 2023 11:01:48 +0100 Subject: [PATCH 14/69] minor fix --- streamflow/cwl/combinator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streamflow/cwl/combinator.py b/streamflow/cwl/combinator.py index 2553ca9d9..c2a43e17a 100644 --- a/streamflow/cwl/combinator.py +++ b/streamflow/cwl/combinator.py @@ -41,7 +41,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow = None, + change_wf: Workflow, ) -> ListMergeCombinator: return cls( name=row["name"], From b01ed166f5d975f2e1616c89c8f1518278155994 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Wed, 8 Nov 2023 12:31:32 +0100 Subject: [PATCH 15/69] fix annotation --- tests/utils/get_instances.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/utils/get_instances.py b/tests/utils/get_instances.py index 5086481b2..6af3910eb 100644 --- a/tests/utils/get_instances.py +++ b/tests/utils/get_instances.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import asyncio import os import posixpath From 38f64122ff8235333c2556e9da3fe6f9af2eea31 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Wed, 8 Nov 2023 14:13:17 +0100 Subject: [PATCH 16/69] moved methods and template --- tests/utils/get_instances.py | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/tests/utils/get_instances.py b/tests/utils/get_instances.py index 6af3910eb..0e535e578 100644 --- a/tests/utils/get_instances.py +++ b/tests/utils/get_instances.py @@ -35,40 +35,8 @@ def get_docker_deployment_config(): ) -async def get_deployment_config( - _context: StreamFlowContext, deployment_t: str -) -> DeploymentConfig: - if deployment_t == "local": - return get_local_deployment_config() - elif deployment_t == "docker": - return get_docker_deployment_config() - elif deployment_t == "kubernetes": - return get_kubernetes_deployment_config() - elif deployment_t == "singularity": - return get_singularity_deployment_config() - elif deployment_t == "ssh": - return await get_ssh_deployment_config(_context) - else: - raise Exception(f"{deployment_t} deployment type not supported") - - -def get_service(_context: StreamFlowContext, deployment_t: str) -> str | None: - if deployment_t == "local": - return None - elif deployment_t == "docker": - return None - elif deployment_t == "kubernetes": - return "sf-test" - elif deployment_t == "singularity": - return None - elif deployment_t == "ssh": - return None - else: - raise Exception(f"{deployment_t} deployment type not supported") - - def get_kubernetes_deployment_config(): - with open(pkg_resources.resource_filename(__name__, "./pod.jinja2")) as t: + with open(pkg_resources.resource_filename(__name__, "pod.jinja2")) as t: template = Template(t.read()) with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: template.stream(name=utils.random_name()).dump(f.name) From 5f90117565366d8cf88cd57f35eab3c527205eaf Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 1 Dec 2023 01:40:01 +0100 Subject: [PATCH 17/69] merge --- streamflow/persistence/base.py | 2 +- streamflow/persistence/schemas/sqlite.sql | 15 ++++++++------- streamflow/workflow/combinator.py | 5 ++++- streamflow/workflow/step.py | 2 +- tests/test_change_wf.py | 17 +++++++---------- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/streamflow/persistence/base.py b/streamflow/persistence/base.py index b97471584..4fca0eb9b 100644 --- a/streamflow/persistence/base.py +++ b/streamflow/persistence/base.py @@ -10,7 +10,7 @@ class CachedDatabase(Database, ABC): def __init__(self, context: StreamFlowContext): super().__init__(context) - self.config_cache: Cache = LRUCache(maxsize=sys.maxsize) + self.deployment_cache: Cache = LRUCache(maxsize=sys.maxsize) self.port_cache: Cache = LRUCache(maxsize=sys.maxsize) self.step_cache: Cache = LRUCache(maxsize=sys.maxsize) self.target_cache: Cache = LRUCache(maxsize=sys.maxsize) diff --git a/streamflow/persistence/schemas/sqlite.sql b/streamflow/persistence/schemas/sqlite.sql index 6a432f278..e3cd991e4 100644 --- a/streamflow/persistence/schemas/sqlite.sql +++ b/streamflow/persistence/schemas/sqlite.sql @@ -77,14 +77,15 @@ CREATE TABLE IF NOT EXISTS provenance ); -CREATE TABLE IF NOT EXISTS config +CREATE TABLE IF NOT EXISTS deployment ( - id INTEGER PRIMARY KEY, - name TEXT, - attr_type TEXT, - config TEXT, - type TEXT, - params TEXT + id INTEGER PRIMARY KEY, + name TEXT, + type TEXT, + config TEXT, + external INTEGER, + lazy INTEGER, + workdir TEXT ); diff --git a/streamflow/workflow/combinator.py b/streamflow/workflow/combinator.py index a67b7e8b5..95388b405 100644 --- a/streamflow/workflow/combinator.py +++ b/streamflow/workflow/combinator.py @@ -220,10 +220,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, + change_wf: Workflow, ) -> LoopTerminationCombinator: combinator = cls( name=row["name"], - workflow=await loading_context.load_workflow(context, row["workflow"]), + workflow=change_wf + if change_wf + else await loading_context.load_workflow(context, row["workflow"]), ) for item in row["output_items"]: combinator.add_output_item(item) diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index b56395693..1b15368fb 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -483,7 +483,7 @@ async def _load( workflow=change_wf if change_wf else await loading_context.load_workflow(context, row["workflow"]), - deployment_config=await loading_context.load_config( + deployment_config=await loading_context.load_deployment( context, params["deployment_config"] ), connector_port=cast( diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py index e4e5b218a..78f72d6b8 100644 --- a/tests/test_change_wf.py +++ b/tests/test_change_wf.py @@ -3,9 +3,9 @@ import pytest from streamflow.core import utils -from streamflow.core.config import BindingConfig, Config +from streamflow.core.config import BindingConfig from streamflow.core.context import StreamFlowContext -from streamflow.core.deployment import LocalTarget +from streamflow.core.deployment import LocalTarget, FilterConfig from streamflow.core.workflow import Workflow, Port, Step from streamflow.cwl.command import CWLCommand, CWLCommandToken from streamflow.cwl.translator import _create_command_output_processor_base @@ -202,18 +202,15 @@ async def test_schedule_step(context: StreamFlowContext): """Test saving ScheduleStep on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=0))[0] deploy_step = create_deploy_step(workflow) + nof_deployments = 2 step = create_schedule_step( workflow, - [deploy_step, deploy_step], + [deploy_step for _ in range(nof_deployments)], BindingConfig( - targets=[LocalTarget(), LocalTarget()], + targets=[LocalTarget() for _ in range(nof_deployments)], filters=[ - Config( - config={"hello": "world"}, name=utils.random_name(), type="shuffle" - ), - Config( - config={"ciao": "mondo"}, name=utils.random_name(), type="linear" - ), + FilterConfig(config={}, name=utils.random_name(), type="shuffle") + for _ in range(nof_deployments) ], ), ) From 465eaaa015cd4350fa628a17c10f91c0ecf8d7d3 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 1 Dec 2023 12:18:18 +0100 Subject: [PATCH 18/69] fix tests --- tests/test_change_wf.py | 2 +- tests/test_cwl_change_wf.py | 2 +- tests/test_cwl_provenance.py | 167 ++++++++++-------------------- tests/test_provenance.py | 4 +- tests/utils/get_instances.py | 190 ----------------------------------- tests/utils/workflow.py | 41 +++++++- 6 files changed, 98 insertions(+), 308 deletions(-) delete mode 100644 tests/utils/get_instances.py diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py index 78f72d6b8..830e32376 100644 --- a/tests/test_change_wf.py +++ b/tests/test_change_wf.py @@ -24,7 +24,7 @@ are_equals, object_to_dict, ) -from tests.utils.get_instances import ( +from tests.utils.workflow import ( create_workflow, create_schedule_step, create_deploy_step, diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py index dd91064d2..ede43654c 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_change_wf.py @@ -36,7 +36,7 @@ set_workflow_in_combinator, workflow_in_combinator_test, ) -from tests.utils.get_instances import create_workflow +from tests.utils.workflow import create_workflow @pytest.mark.asyncio diff --git a/tests/test_cwl_provenance.py b/tests/test_cwl_provenance.py index 20b05440f..4cfb284e5 100644 --- a/tests/test_cwl_provenance.py +++ b/tests/test_cwl_provenance.py @@ -13,9 +13,7 @@ CWLConditionalStep, CWLEmptyScatterConditionalStep, CWLInputInjectorStep, - CWLLoopConditionalStep, CWLLoopOutputAllStep, - CWLLoopOutputLastStep, CWLTransferStep, ) from streamflow.cwl.transformer import ( @@ -38,12 +36,14 @@ from streamflow.workflow.step import CombinatorStep from streamflow.workflow.token import IterationTerminationToken, ListToken from tests.test_provenance import ( - create_deploy_step, - create_schedule_step, + general_test, + put_tokens, + verify_dependency_tokens, +) +from tests.utils.get_instances import ( create_workflow, - _general_test, - _put_tokens, - _verify_dependency_tokens, + create_schedule_step, + create_deploy_step, ) @@ -52,7 +52,7 @@ async def test_default_transformer(context: StreamFlowContext): """Test token provenance for DefaultTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [Token("a")] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -67,7 +67,7 @@ async def test_default_transformer(context: StreamFlowContext): # len(token_list) = N output tokens + 1 termination token assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -80,7 +80,7 @@ async def test_default_retag_transformer(context: StreamFlowContext): """Test token provenance for DefaultRetagTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [Token("a")] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -93,7 +93,7 @@ async def test_default_retag_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -108,7 +108,7 @@ async def test_cwl_token_transformer(context: StreamFlowContext): port_name = "test" step_name = utils.random_name() token_list = [Token("a")] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -126,7 +126,7 @@ async def test_cwl_token_transformer(context: StreamFlowContext): port_name=port_name, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -140,7 +140,7 @@ async def test_value_from_transformer(context: StreamFlowContext): workflow, (in_port, out_port) = await create_workflow(context) port_name = "test" token_list = [Token(10)] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -160,7 +160,7 @@ async def test_value_from_transformer(context: StreamFlowContext): port_name=port_name, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -173,7 +173,7 @@ async def test_all_non_null_transformer(context: StreamFlowContext): """Test token provenance for AllNonNullTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token("a"), Token(None), Token("b")])] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -185,7 +185,7 @@ async def test_all_non_null_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -198,7 +198,7 @@ async def test_first_non_null_transformer(context: StreamFlowContext): """Test token provenance for FirstNonNullTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token(None), Token("a")])] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -210,7 +210,7 @@ async def test_first_non_null_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -223,7 +223,7 @@ async def test_forward_transformer(context: StreamFlowContext): """Test token provenance for ForwardTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token("a")])] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -235,7 +235,7 @@ async def test_forward_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -248,7 +248,7 @@ async def test_list_to_element_transformer(context: StreamFlowContext): """Test token provenance for ListToElementTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token("a")])] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -260,7 +260,7 @@ async def test_list_to_element_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -273,7 +273,7 @@ async def test_only_non_null_transformer(context: StreamFlowContext): """Test token provenance for OnlyNonNullTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token(None), Token("a")])] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -285,7 +285,7 @@ async def test_only_non_null_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -299,7 +299,7 @@ async def test_cwl_conditional_step(context: StreamFlowContext): workflow, (in_port, out_port) = await create_workflow(context) port_name = "test" token_list = [ListToken([Token("a")])] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -314,64 +314,7 @@ async def test_cwl_conditional_step(context: StreamFlowContext): port_name=port_name, ) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( - token=out_port.token_list[0], - port=out_port, - context=context, - expected_dependee=token_list, - ) - - -@pytest.mark.asyncio -async def test_cwl_empty_scatter_conditional_step(context: StreamFlowContext): - """Test token provenance for CWLEmptyScatterConditionalStep""" - workflow, (in_port, out_port) = await create_workflow(context) - port_name = "test" - token_list = [ListToken([Token("a")])] - await _general_test( - context=context, - workflow=workflow, - in_port=in_port, - out_port=out_port, - step_cls=CWLEmptyScatterConditionalStep, - kwargs_step={ - "name": utils.random_name() + "-empty-scatter-condition", - "scatter_method": "dotproduct", - }, - token_list=token_list, - port_name=port_name, - ) - assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( - token=out_port.token_list[0], - port=out_port, - context=context, - expected_dependee=token_list, - ) - - -@pytest.mark.asyncio -async def test_cwl_loop_conditional_step(context: StreamFlowContext): - """Test token provenance for CWLLoopConditionalStep""" - workflow, (in_port, out_port) = await create_workflow(context) - port_name = "test" - token_list = [ListToken([Token("a")])] - await _general_test( - context=context, - workflow=workflow, - in_port=in_port, - out_port=out_port, - step_cls=CWLLoopConditionalStep, - kwargs_step={ - "name": utils.random_name() + "-when", - "expression": f"$(inputs.{port_name}.length == 1)", - "full_js": True, - }, - token_list=token_list, - port_name=port_name, - ) - assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -384,10 +327,10 @@ async def test_transfer_step(context: StreamFlowContext): """Test token provenance for CWLTransferStep""" workflow, (in_port, out_port) = await create_workflow(context) deploy_step = create_deploy_step(workflow) - schedule_step = create_schedule_step(workflow, deploy_step) + schedule_step = create_schedule_step(workflow, [deploy_step]) port_name = "test" token_list = [Token("a")] - transfer_step = await _general_test( + transfer_step = await general_test( context=context, workflow=workflow, in_port=in_port, @@ -404,7 +347,7 @@ async def test_transfer_step(context: StreamFlowContext): await context.scheduler.notify_status(job_token.value.name, Status.COMPLETED) token_list.append(job_token) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -417,9 +360,9 @@ async def test_cwl_input_injector_step(context: StreamFlowContext): """Test token provenance for CWLInputInjectorStep""" workflow, (in_port, out_port) = await create_workflow(context) deploy_step = create_deploy_step(workflow) - schedule_step = create_schedule_step(workflow, deploy_step) + schedule_step = create_schedule_step(workflow, [deploy_step]) token_list = [Token("a")] - injector = await _general_test( + injector = await general_test( context=context, workflow=workflow, in_port=in_port, @@ -430,13 +373,12 @@ async def test_cwl_input_injector_step(context: StreamFlowContext): "job_port": schedule_step.get_output_port(), }, token_list=token_list, - save_input_token=True, ) job_token = injector.get_input_port("__job__").token_list[0] await context.scheduler.notify_status(job_token.value.name, Status.COMPLETED) token_list.append(job_token) assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -449,7 +391,7 @@ async def test_empty_scatter_conditional_step(context: StreamFlowContext): """Test token provenance for CWLEmptyScatterConditionalStep""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token(i), Token(i * 100)]) for i in range(1, 5)] - await _general_test( + await general_test( context=context, workflow=workflow, in_port=in_port, @@ -464,7 +406,7 @@ async def test_empty_scatter_conditional_step(context: StreamFlowContext): assert len(out_port.token_list) == 5 for in_token, out_token in zip(in_port.token_list[:-1], out_port.token_list[:-1]): - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_token, port=out_port, context=context, @@ -494,7 +436,7 @@ async def test_list_merge_combinator(context: StreamFlowContext): step.add_output_port(port_name, out_port) list_token = [ListToken([Token("a"), Token("b")])] - await _put_tokens(list_token, in_port, context) + await put_tokens(list_token, in_port, context) step.combinator.add_item(port_name) await workflow.save(context) @@ -502,7 +444,7 @@ async def test_list_merge_combinator(context: StreamFlowContext): await executor.run() assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -534,15 +476,15 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): transformer.add_output_port(port_name, out_port) token_list = [Token(10)] - await _put_tokens(token_list, in_port, context) - await _put_tokens(token_list, loop_port, context) + await put_tokens(token_list, in_port, context) + await put_tokens(token_list, loop_port, context) await workflow.save(context) executor = StreamFlowExecutor(workflow) await executor.run() assert len(transformer.get_output_port(port_name).token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -551,34 +493,35 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): @pytest.mark.asyncio -@pytest.mark.parametrize("step_cls", [CWLLoopOutputAllStep, CWLLoopOutputLastStep]) -async def test_cwl_loop_output(context: StreamFlowContext, step_cls): - """Test token provenance for CWLLoopOutput""" +async def test_cwl_loop_output_all_step(context: StreamFlowContext): + """Test token provenance for CWLLoopOutputAllStep""" workflow, (in_port, out_port) = await create_workflow(context) step = workflow.create_step( - cls=step_cls, + cls=CWLLoopOutputAllStep, name=posixpath.join(utils.random_name(), "-loop-output"), ) port_name = "test" step.add_input_port(port_name, in_port) step.add_output_port(port_name, out_port) - token_list = [ - Token("b", tag="0.1"), - IterationTerminationToken("0.1"), + tag = "0.1" + list_token = [ + ListToken([Token("a"), Token("b")], tag=tag), + IterationTerminationToken(tag), ] - await _put_tokens(token_list, in_port, context) + + await put_tokens(list_token, in_port, context) await workflow.save(context) executor = StreamFlowExecutor(workflow) await executor.run() assert len(out_port.token_list) == 2 - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, - expected_dependee=[token_list[0]], + expected_dependee=[list_token[0]], ) @@ -619,13 +562,13 @@ async def test_nested_crossproduct_combinator(context: StreamFlowContext): ListToken([Token("a"), Token("b")], tag="0.0"), ListToken([Token("c"), Token("d")], tag="0.1"), ] - await _put_tokens(list_token_1, in_port_1, context) + await put_tokens(list_token_1, in_port_1, context) list_token_2 = [ ListToken([Token("1"), Token("2")], tag="0.0"), ListToken([Token("3"), Token("4")], tag="0.1"), ] - await _put_tokens(list_token_2, in_port_2, context) + await put_tokens(list_token_2, in_port_2, context) await workflow.save(context) executor = StreamFlowExecutor(workflow) @@ -695,7 +638,7 @@ async def test_nested_crossproduct_combinator(context: StreamFlowContext): # check port_1 outputs assert len(out_port_1.token_list) == 5 for i, out_token in enumerate(out_port_1.token_list[:-1]): - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_token, port=out_port_1, context=context, @@ -706,7 +649,7 @@ async def test_nested_crossproduct_combinator(context: StreamFlowContext): # check port_2 outputs assert len(out_port_2.token_list) == 5 for i, out_token in enumerate(out_port_2.token_list[:-1]): - await _verify_dependency_tokens( + await verify_dependency_tokens( token=out_token, port=out_port_2, context=context, diff --git a/tests/test_provenance.py b/tests/test_provenance.py index 7b8d6606a..f023893bb 100644 --- a/tests/test_provenance.py +++ b/tests/test_provenance.py @@ -212,7 +212,7 @@ async def test_schedule_step(context: StreamFlowContext): """Test token provenance for ScheduleStep""" workflow = (await create_workflow(context, num_port=0))[0] deploy_step = create_deploy_step(workflow) - schedule_step = create_schedule_step(workflow, deploy_step) + schedule_step = create_schedule_step(workflow, [deploy_step]) await workflow.save(context) executor = StreamFlowExecutor(workflow) @@ -241,7 +241,7 @@ async def test_execute_step(context: StreamFlowContext): context, num_port=3 ) deploy_step = create_deploy_step(workflow) - schedule_step = create_schedule_step(workflow, deploy_step) + schedule_step = create_schedule_step(workflow, [deploy_step]) in_port_name = "in-1" out_port_name = "out-1" diff --git a/tests/utils/get_instances.py b/tests/utils/get_instances.py deleted file mode 100644 index 0e535e578..000000000 --- a/tests/utils/get_instances.py +++ /dev/null @@ -1,190 +0,0 @@ -from __future__ import annotations - -import asyncio -import os -import posixpath -import tempfile -from jinja2 import Template -from typing import MutableSequence, cast - -import asyncssh -import asyncssh.public_key -import pkg_resources - -from streamflow.core import utils -from streamflow.core.config import BindingConfig -from streamflow.core.context import StreamFlowContext -from streamflow.core.deployment import Target, DeploymentConfig, LOCAL_LOCATION -from streamflow.core.workflow import Workflow, Port -from streamflow.workflow.combinator import ( - DotProductCombinator, - LoopTerminationCombinator, - CartesianProductCombinator, -) -from streamflow.workflow.port import ConnectorPort -from streamflow.workflow.step import DeployStep, ScheduleStep - - -def get_docker_deployment_config(): - return DeploymentConfig( - name="alpine-docker", - type="docker", - config={"image": "alpine:3.16.2"}, - external=False, - lazy=False, - ) - - -def get_kubernetes_deployment_config(): - with open(pkg_resources.resource_filename(__name__, "pod.jinja2")) as t: - template = Template(t.read()) - with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: - template.stream(name=utils.random_name()).dump(f.name) - return DeploymentConfig( - name="alpine-kubernetes", - type="kubernetes", - config={"files": [f.name]}, - external=False, - lazy=False, - ) - - -def get_singularity_deployment_config(): - return DeploymentConfig( - name="alpine-singularity", - type="singularity", - config={"image": "docker://alpine:3.16.2"}, - external=False, - lazy=False, - ) - - -async def get_ssh_deployment_config(_context: StreamFlowContext): - skey = asyncssh.public_key.generate_private_key( - alg_name="ssh-rsa", - comment="streamflow-test", - key_size=4096, - ) - public_key = skey.export_public_key().decode("utf-8") - with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: - skey.write_private_key(f.name) - docker_config = DeploymentConfig( - name="linuxserver-ssh-docker", - type="docker", - config={ - "image": "lscr.io/linuxserver/openssh-server", - "env": [f"PUBLIC_KEY={public_key}"], - "init": False, - "publish": ["2222:2222"], - }, - external=False, - lazy=False, - ) - await _context.deployment_manager.deploy(docker_config) - await asyncio.sleep(5) - return DeploymentConfig( - name="linuxserver-ssh", - type="ssh", - config={ - "nodes": [ - { - "checkHostKey": False, - "hostname": "127.0.0.1:2222", - "sshKey": f.name, - "username": "linuxserver.io", - } - ], - "maxConcurrentSessions": 10, - }, - external=False, - lazy=False, - ) - - -def get_local_deployment_config(): - return DeploymentConfig( - name=LOCAL_LOCATION, - type="local", - config={}, - external=True, - lazy=False, - workdir=os.path.realpath(tempfile.gettempdir()), - ) - - -def create_deploy_step(workflow, deployment_config=None): - connector_port = workflow.create_port(cls=ConnectorPort) - if not deployment_config: - deployment_config = get_docker_deployment_config() - return workflow.create_step( - cls=DeployStep, - name=posixpath.join("__deploy__", deployment_config.name), - deployment_config=deployment_config, - connector_port=connector_port, - ) - - -def create_schedule_step( - workflow: Workflow, - deploy_steps: MutableSequence[DeployStep], - binding_config: BindingConfig = None, -): - # It is necessary to pass in the correct order biding_config.targets and deploy_steps for the mapping - if not binding_config: - binding_config = BindingConfig( - targets=[ - Target( - deployment=deploy_step.deployment_config, - workdir=utils.random_name(), - ) - for deploy_step in deploy_steps - ] - ) - return workflow.create_step( - cls=ScheduleStep, - name=posixpath.join(utils.random_name(), "__schedule__"), - job_prefix="something", - connector_ports={ - target.deployment.name: deploy_step.get_output_port() - for target, deploy_step in zip(binding_config.targets, deploy_steps) - }, - binding_config=binding_config, - ) - - -async def create_workflow( - context: StreamFlowContext, num_port: int = 2 -) -> tuple[Workflow, tuple[Port]]: - workflow = Workflow( - context=context, type="cwl", name=utils.random_name(), config={} - ) - ports = [] - for _ in range(num_port): - ports.append(workflow.create_port()) - await workflow.save(context) - return workflow, tuple(cast(MutableSequence[Port], ports)) - - -def get_dot_combinator(): - return DotProductCombinator(name=utils.random_name(), workflow=None) - - -def get_cartesian_product_combinator(): - return CartesianProductCombinator(name=utils.random_name(), workflow=None) - - -def get_loop_terminator_combinator(): - c = LoopTerminationCombinator(name=utils.random_name(), workflow=None) - c.add_output_item("test1") - c.add_output_item("test2") - return c - - -def get_nested_crossproduct(): - combinator = DotProductCombinator(name=utils.random_name(), workflow=None) - c1 = CartesianProductCombinator(name=utils.random_name(), workflow=None) - c1.add_item("ext") - c1.add_item("inn") - items = c1.get_items(False) - combinator.add_combinator(c1, items) - return combinator diff --git a/tests/utils/workflow.py b/tests/utils/workflow.py index a9f8e1638..d571c381d 100644 --- a/tests/utils/workflow.py +++ b/tests/utils/workflow.py @@ -6,6 +6,11 @@ from streamflow.core import utils from streamflow.core.deployment import Target from streamflow.core.config import BindingConfig +from streamflow.workflow.combinator import ( + DotProductCombinator, + CartesianProductCombinator, + LoopTerminationCombinator, +) from streamflow.workflow.port import ConnectorPort from streamflow.core.workflow import Workflow, Port from streamflow.workflow.step import DeployStep, ScheduleStep @@ -27,7 +32,12 @@ def create_deploy_step(workflow, deployment_config=None): ) -def create_schedule_step(workflow, deploy_step, binding_config=None): +def create_schedule_step( + workflow: Workflow, + deploy_steps: MutableSequence[DeployStep], + binding_config: BindingConfig = None, +): + # It is necessary to pass in the correct order biding_config.targets and deploy_steps for the mapping if not binding_config: binding_config = BindingConfig( targets=[ @@ -35,6 +45,7 @@ def create_schedule_step(workflow, deploy_step, binding_config=None): deployment=deploy_step.deployment_config, workdir=utils.random_name(), ) + for deploy_step in deploy_steps ] ) return workflow.create_step( @@ -42,7 +53,8 @@ def create_schedule_step(workflow, deploy_step, binding_config=None): name=posixpath.join(utils.random_name(), "__schedule__"), job_prefix="something", connector_ports={ - binding_config.targets[0].deployment.name: deploy_step.get_output_port() + target.deployment.name: deploy_step.get_output_port() + for target, deploy_step in zip(binding_config.targets, deploy_steps) }, binding_config=binding_config, ) @@ -59,3 +71,28 @@ async def create_workflow( ports.append(workflow.create_port()) await workflow.save(context) return workflow, tuple(cast(MutableSequence[Port], ports)) + + +def get_dot_combinator(): + return DotProductCombinator(name=utils.random_name(), workflow=None) + + +def get_cartesian_product_combinator(): + return CartesianProductCombinator(name=utils.random_name(), workflow=None) + + +def get_loop_terminator_combinator(): + c = LoopTerminationCombinator(name=utils.random_name(), workflow=None) + c.add_output_item("test1") + c.add_output_item("test2") + return c + + +def get_nested_crossproduct(): + combinator = DotProductCombinator(name=utils.random_name(), workflow=None) + c1 = CartesianProductCombinator(name=utils.random_name(), workflow=None) + c1.add_item("ext") + c1.add_item("inn") + items = c1.get_items(False) + combinator.add_combinator(c1, items) + return combinator From d2a7f0ec7298504f428946c65012a4c851351bd8 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sun, 3 Dec 2023 22:16:55 +0100 Subject: [PATCH 19/69] minor fix --- streamflow/core/utils.py | 3 ++- streamflow/cwl/processor.py | 2 +- streamflow/workflow/step.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/streamflow/core/utils.py b/streamflow/core/utils.py index 58c8181c7..246c15f5b 100644 --- a/streamflow/core/utils.py +++ b/streamflow/core/utils.py @@ -286,6 +286,8 @@ async def get_dependencies( context: StreamFlowContext, loading_context: DatabaseLoadingContext, ): + # This method is generally called from the step load method. If the change_wf is enabled, + # it is not helpful to get the Port instance in loading_context if load_ports: ports = await asyncio.gather( *( @@ -295,7 +297,6 @@ async def get_dependencies( ) return {d["name"]: p.name for d, p in zip(dependency_rows, ports)} else: - # it is not helpful to have the Port instance in loading_context when it is loading on a new workflow port_rows = await asyncio.gather( *( asyncio.create_task(context.database.get_port(d["port"])) diff --git a/streamflow/cwl/processor.py b/streamflow/cwl/processor.py index 19a58c566..5f6a3fc1c 100644 --- a/streamflow/cwl/processor.py +++ b/streamflow/cwl/processor.py @@ -166,7 +166,7 @@ async def _load( format_graph.parse(data=row["format_graph"]) if row["format_graph"] is not None else None - ), # todo: fix multiple instance + ), # todo: fix multiple instances full_js=row["full_js"], load_contents=row["load_contents"], load_listing=LoadListing(row["load_listing"]) diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index 1b15368fb..8c7e8c257 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -58,8 +58,8 @@ async def _get_port( ): if change_wf: port_row = await context.database.get_port(port_id) - if port_row["name"] in change_wf.ports.keys(): - return change_wf.ports[port_row["name"]] + if port := change_wf.ports.get(port_row["name"]): + return port # If the port is not available in the new workflow, a new one must be created return await Port.load(context, port_id, loading_context, change_wf) From e4dd0d26e9191d3eebc58d7877b8ce87693d4899 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Thu, 4 Jan 2024 00:06:00 +0100 Subject: [PATCH 20/69] renamed parameter name --- streamflow/core/utils.py | 3 +- streamflow/core/workflow.py | 50 +++++++++--------- streamflow/cwl/combinator.py | 6 +-- streamflow/cwl/processor.py | 60 ++++++++++----------- streamflow/cwl/step.py | 14 ++--- streamflow/cwl/transformer.py | 22 ++++---- streamflow/workflow/combinator.py | 12 ++--- streamflow/workflow/step.py | 88 ++++++++++++++----------------- tests/test_cwl_provenance.py | 84 ++++++++++++++--------------- 9 files changed, 163 insertions(+), 176 deletions(-) diff --git a/streamflow/core/utils.py b/streamflow/core/utils.py index 246c15f5b..c1dacd159 100644 --- a/streamflow/core/utils.py +++ b/streamflow/core/utils.py @@ -286,7 +286,8 @@ async def get_dependencies( context: StreamFlowContext, loading_context: DatabaseLoadingContext, ): - # This method is generally called from the step load method. If the change_wf is enabled, + # This method is generally called from the step load method. + # If the workflow parameter in the load method has a value, the load_ports is False because # it is not helpful to get the Port instance in loading_context if load_ports: ports = await asyncio.gather( diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 4ce37c070..91ae72df0 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -103,12 +103,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] @@ -130,12 +130,12 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow = None, + workflow: Workflow = None, ) -> CommandOutputProcessor: type = cast( Type[CommandOutputProcessor], utils.get_class_from_name(row["type"]) ) - return await type._load(context, row["params"], loading_context, change_wf) + return await type._load(context, row["params"], loading_context, workflow) @abstractmethod async def process( @@ -264,12 +264,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> Port: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), ) @@ -314,12 +314,12 @@ async def load( context: StreamFlowContext, persistent_id: int, loading_context: DatabaseLoadingContext, - change_wf: Workflow = None, + workflow: Workflow = None, ) -> Port: row = await context.database.get_port(persistent_id) type = cast(Type[Port], utils.get_class_from_name(row["type"])) - port = await type._load(context, row, loading_context, change_wf) - if not change_wf: + port = await type._load(context, row, loading_context, workflow) + if not workflow: port.persistent_id = persistent_id loading_context.add_port(persistent_id, port) return port @@ -374,12 +374,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ): return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), ) @@ -441,12 +441,12 @@ async def load( context: StreamFlowContext, persistent_id: int, loading_context: DatabaseLoadingContext, - change_wf: Workflow = None, + workflow: Workflow = None, ) -> Step: row = await context.database.get_step(persistent_id) type = cast(Type[Step], utils.get_class_from_name(row["type"])) - step = await type._load(context, row, loading_context, change_wf) - if not change_wf: + step = await type._load(context, row, loading_context, workflow) + if not workflow: step.persistent_id = persistent_id step.status = Status(row["status"]) step.terminated = step.status in [ @@ -456,13 +456,13 @@ async def load( ] input_deps = await context.database.get_input_ports(persistent_id) step.input_ports = await get_dependencies( - input_deps, change_wf is None, context, loading_context + input_deps, workflow is None, context, loading_context ) output_deps = await context.database.get_output_ports(persistent_id) step.output_ports = await get_dependencies( - output_deps, change_wf is None, context, loading_context + output_deps, workflow is None, context, loading_context ) - if not change_wf: + if not workflow: loading_context.add_step(persistent_id, step) return step @@ -593,12 +593,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> TokenProcessor: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), ) @@ -611,10 +611,10 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow = None, + workflow: Workflow = None, ): type = cast(Type[TokenProcessor], utils.get_class_from_name(row["type"])) - return await type._load(context, row["params"], loading_context, change_wf) + return await type._load(context, row["params"], loading_context, workflow) @abstractmethod async def process(self, inputs: MutableMapping[str, Token], token: Token) -> Token: diff --git a/streamflow/cwl/combinator.py b/streamflow/cwl/combinator.py index c2a43e17a..398317ed8 100644 --- a/streamflow/cwl/combinator.py +++ b/streamflow/cwl/combinator.py @@ -41,12 +41,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> ListMergeCombinator: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), input_names=row["input_names"], output_name=row["output_name"], diff --git a/streamflow/cwl/processor.py b/streamflow/cwl/processor.py index 5f6a3fc1c..6c616ddbd 100644 --- a/streamflow/cwl/processor.py +++ b/streamflow/cwl/processor.py @@ -149,13 +149,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CWLTokenProcessor: format_graph = Graph() return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), token_type=row["token_type"], check_type=row["check_type"], @@ -393,12 +393,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] @@ -734,15 +734,15 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CWLMapTokenProcessor: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), processor=await TokenProcessor.load( - context, row["processor"], loading_context, change_wf + context, row["processor"], loading_context, workflow ), optional=row["optional"], ) @@ -799,15 +799,15 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), processor=await CommandOutputProcessor.load( - context, row["processor"], loading_context, change_wf + context, row["processor"], loading_context, workflow ), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] @@ -875,12 +875,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CWLObjectTokenProcessor: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), processors={ k: v @@ -890,7 +890,7 @@ async def _load( *( asyncio.create_task( TokenProcessor.load( - context, v, loading_context, change_wf + context, v, loading_context, workflow ) ) for v in row["processors"].values() @@ -980,13 +980,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CommandOutputProcessor: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] @@ -999,7 +999,7 @@ async def _load( *( asyncio.create_task( CommandOutputProcessor.load( - context, v, loading_context, change_wf + context, v, loading_context, workflow ) ) for v in row["processors"].values() @@ -1167,19 +1167,19 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CWLUnionTokenProcessor: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), processors=cast( MutableSequence[TokenProcessor], await asyncio.gather( *( asyncio.create_task( - TokenProcessor.load(context, p, loading_context, change_wf) + TokenProcessor.load(context, p, loading_context, workflow) ) for p in row["processors"] ) @@ -1287,12 +1287,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), processors=cast( MutableSequence[CommandOutputProcessor], @@ -1300,7 +1300,7 @@ async def _load( *( asyncio.create_task( CommandOutputProcessor.load( - context, p, loading_context, change_wf + context, p, loading_context, workflow ) ) for p in row["processors"] diff --git a/streamflow/cwl/step.py b/streamflow/cwl/step.py index f31649e16..c9166735f 100644 --- a/streamflow/cwl/step.py +++ b/streamflow/cwl/step.py @@ -138,13 +138,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CWLConditionalStep: params = json.loads(row["params"]) step = cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), expression=params["expression"], expression_lib=params["expression_lib"], @@ -155,7 +155,7 @@ async def _load( await asyncio.gather( *( asyncio.create_task( - _get_port(context, value, loading_context, change_wf) + _get_port(context, value, loading_context, workflow) ) for value in params["skip_ports"].values() ) @@ -224,13 +224,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), scatter_method=params["scatter_method"], ) diff --git a/streamflow/cwl/transformer.py b/streamflow/cwl/transformer.py index 819641cfd..ac10ac292 100644 --- a/streamflow/cwl/transformer.py +++ b/streamflow/cwl/transformer.py @@ -46,13 +46,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), default_port=await loading_context.load_port( context, params["default_port"] @@ -119,17 +119,17 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), port_name=params["port_name"], processor=await TokenProcessor.load( - context, params["processor"], loading_context, change_wf + context, params["processor"], loading_context, workflow ), ) @@ -251,17 +251,17 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), port_name=params["port_name"], processor=await TokenProcessor.load( - context, params["processor"], loading_context, change_wf + context, params["processor"], loading_context, workflow ), value_from=params["value_from"], expression_lib=params["expression_lib"], diff --git a/streamflow/workflow/combinator.py b/streamflow/workflow/combinator.py index 95388b405..2d808bb35 100644 --- a/streamflow/workflow/combinator.py +++ b/streamflow/workflow/combinator.py @@ -26,12 +26,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CartesianProductCombinator: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), depth=row["depth"], ) @@ -220,12 +220,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> LoopTerminationCombinator: combinator = cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), ) for item in row["output_items"]: diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index 8c7e8c257..248748151 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -54,15 +54,15 @@ async def _get_port( context: StreamFlowContext, port_id: int, loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ): - if change_wf: + if workflow: port_row = await context.database.get_port(port_id) - if port := change_wf.ports.get(port_row["name"]): + if port := workflow.ports.get(port_row["name"]): return port # If the port is not available in the new workflow, a new one must be created - return await Port.load(context, port_id, loading_context, change_wf) + return await Port.load(context, port_id, loading_context, workflow) return await loading_context.load_port(context, port_id) @@ -171,12 +171,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> Combinator: return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), ) @@ -233,18 +233,16 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow = None, + workflow: Workflow = None, ) -> Combinator: type = cast(Combinator, utils.get_class_from_name(row["type"])) - combinator = await type._load( - context, row["params"], loading_context, change_wf - ) + combinator = await type._load(context, row["params"], loading_context, workflow) combinator.items = row["params"]["items"] combinator.combinators_map = row["params"]["combinators_map"] combinator.combinators = {} for k, c in row["params"]["combinators"].items(): combinator.combinators[k] = await Combinator.load( - context, c, loading_context, change_wf + context, c, loading_context, workflow ) return combinator @@ -304,16 +302,16 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> CombinatorStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), combinator=await Combinator.load( - context, params["combinator"], loading_context, change_wf + context, params["combinator"], loading_context, workflow ), ) @@ -475,13 +473,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> DeployStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), deployment_config=await loading_context.load_deployment( context, params["deployment_config"] @@ -489,7 +487,7 @@ async def _load( connector_port=cast( ConnectorPort, await _get_port( - context, params["connector_port"], loading_context, change_wf + context, params["connector_port"], loading_context, workflow ), ), ) @@ -591,19 +589,17 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> ExecuteStep: params = json.loads(row["params"]) step = cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), job_port=cast( JobPort, - await _get_port( - context, params["job_port"], loading_context, change_wf - ), + await _get_port(context, params["job_port"], loading_context, workflow), ), ) step.output_connectors = params["output_connectors"] @@ -615,7 +611,7 @@ async def _load( *( asyncio.create_task( CommandOutputProcessor.load( - context, p, loading_context, change_wf + context, p, loading_context, workflow ) ) for p in params["output_processors"].values() @@ -875,13 +871,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> GatherStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), depth=params["depth"], ) @@ -963,19 +959,17 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> InputInjectorStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), job_port=cast( JobPort, - await _get_port( - context, params["job_port"], loading_context, change_wf - ), + await _get_port(context, params["job_port"], loading_context, workflow), ), ) @@ -1266,7 +1260,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> ScheduleStep: params = json.loads(row["params"]) if hardware_requirement := params.get("hardware_requirement"): @@ -1275,8 +1269,8 @@ async def _load( ) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), binding_config=await BindingConfig.load( context, params["binding_config"], loading_context @@ -1284,15 +1278,13 @@ async def _load( connector_ports={ k: cast( ConnectorPort, - await _get_port(context, v, loading_context, change_wf), + await _get_port(context, v, loading_context, workflow), ) for k, v in params["connector_ports"].items() }, job_port=cast( JobPort, - await _get_port( - context, params["job_port"], loading_context, change_wf - ), + await _get_port(context, params["job_port"], loading_context, workflow), ), job_prefix=params["job_prefix"], hardware_requirement=hardware_requirement, @@ -1562,19 +1554,17 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - change_wf: Workflow, + workflow: Workflow, ) -> TransferStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=change_wf - if change_wf + workflow=workflow + if workflow else await loading_context.load_workflow(context, row["workflow"]), job_port=cast( JobPort, - await _get_port( - context, params["job_port"], loading_context, change_wf - ), + await _get_port(context, params["job_port"], loading_context, workflow), ), ) diff --git a/tests/test_cwl_provenance.py b/tests/test_cwl_provenance.py index 4cfb284e5..20eacb21f 100644 --- a/tests/test_cwl_provenance.py +++ b/tests/test_cwl_provenance.py @@ -35,15 +35,11 @@ from streamflow.workflow.executor import StreamFlowExecutor from streamflow.workflow.step import CombinatorStep from streamflow.workflow.token import IterationTerminationToken, ListToken -from tests.test_provenance import ( - general_test, - put_tokens, - verify_dependency_tokens, -) -from tests.utils.get_instances import ( +from tests.test_provenance import _general_test, _verify_dependency_tokens, _put_tokens +from tests.utils.workflow import ( create_workflow, - create_schedule_step, create_deploy_step, + create_schedule_step, ) @@ -52,7 +48,7 @@ async def test_default_transformer(context: StreamFlowContext): """Test token provenance for DefaultTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [Token("a")] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -67,7 +63,7 @@ async def test_default_transformer(context: StreamFlowContext): # len(token_list) = N output tokens + 1 termination token assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -80,7 +76,7 @@ async def test_default_retag_transformer(context: StreamFlowContext): """Test token provenance for DefaultRetagTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [Token("a")] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -93,7 +89,7 @@ async def test_default_retag_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -108,7 +104,7 @@ async def test_cwl_token_transformer(context: StreamFlowContext): port_name = "test" step_name = utils.random_name() token_list = [Token("a")] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -126,7 +122,7 @@ async def test_cwl_token_transformer(context: StreamFlowContext): port_name=port_name, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -140,7 +136,7 @@ async def test_value_from_transformer(context: StreamFlowContext): workflow, (in_port, out_port) = await create_workflow(context) port_name = "test" token_list = [Token(10)] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -160,7 +156,7 @@ async def test_value_from_transformer(context: StreamFlowContext): port_name=port_name, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -173,7 +169,7 @@ async def test_all_non_null_transformer(context: StreamFlowContext): """Test token provenance for AllNonNullTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token("a"), Token(None), Token("b")])] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -185,7 +181,7 @@ async def test_all_non_null_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -198,7 +194,7 @@ async def test_first_non_null_transformer(context: StreamFlowContext): """Test token provenance for FirstNonNullTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token(None), Token("a")])] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -210,7 +206,7 @@ async def test_first_non_null_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -223,7 +219,7 @@ async def test_forward_transformer(context: StreamFlowContext): """Test token provenance for ForwardTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token("a")])] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -235,7 +231,7 @@ async def test_forward_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -248,7 +244,7 @@ async def test_list_to_element_transformer(context: StreamFlowContext): """Test token provenance for ListToElementTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token("a")])] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -260,7 +256,7 @@ async def test_list_to_element_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -273,7 +269,7 @@ async def test_only_non_null_transformer(context: StreamFlowContext): """Test token provenance for OnlyNonNullTransformer""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token(None), Token("a")])] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -285,7 +281,7 @@ async def test_only_non_null_transformer(context: StreamFlowContext): token_list=token_list, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -299,7 +295,7 @@ async def test_cwl_conditional_step(context: StreamFlowContext): workflow, (in_port, out_port) = await create_workflow(context) port_name = "test" token_list = [ListToken([Token("a")])] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -314,7 +310,7 @@ async def test_cwl_conditional_step(context: StreamFlowContext): port_name=port_name, ) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -330,7 +326,7 @@ async def test_transfer_step(context: StreamFlowContext): schedule_step = create_schedule_step(workflow, [deploy_step]) port_name = "test" token_list = [Token("a")] - transfer_step = await general_test( + transfer_step = await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -347,7 +343,7 @@ async def test_transfer_step(context: StreamFlowContext): await context.scheduler.notify_status(job_token.value.name, Status.COMPLETED) token_list.append(job_token) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -362,7 +358,7 @@ async def test_cwl_input_injector_step(context: StreamFlowContext): deploy_step = create_deploy_step(workflow) schedule_step = create_schedule_step(workflow, [deploy_step]) token_list = [Token("a")] - injector = await general_test( + injector = await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -378,7 +374,7 @@ async def test_cwl_input_injector_step(context: StreamFlowContext): await context.scheduler.notify_status(job_token.value.name, Status.COMPLETED) token_list.append(job_token) assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -391,7 +387,7 @@ async def test_empty_scatter_conditional_step(context: StreamFlowContext): """Test token provenance for CWLEmptyScatterConditionalStep""" workflow, (in_port, out_port) = await create_workflow(context) token_list = [ListToken([Token(i), Token(i * 100)]) for i in range(1, 5)] - await general_test( + await _general_test( context=context, workflow=workflow, in_port=in_port, @@ -406,7 +402,7 @@ async def test_empty_scatter_conditional_step(context: StreamFlowContext): assert len(out_port.token_list) == 5 for in_token, out_token in zip(in_port.token_list[:-1], out_port.token_list[:-1]): - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_token, port=out_port, context=context, @@ -436,7 +432,7 @@ async def test_list_merge_combinator(context: StreamFlowContext): step.add_output_port(port_name, out_port) list_token = [ListToken([Token("a"), Token("b")])] - await put_tokens(list_token, in_port, context) + await _put_tokens(list_token, in_port, context) step.combinator.add_item(port_name) await workflow.save(context) @@ -444,7 +440,7 @@ async def test_list_merge_combinator(context: StreamFlowContext): await executor.run() assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -476,15 +472,15 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): transformer.add_output_port(port_name, out_port) token_list = [Token(10)] - await put_tokens(token_list, in_port, context) - await put_tokens(token_list, loop_port, context) + await _put_tokens(token_list, in_port, context) + await _put_tokens(token_list, loop_port, context) await workflow.save(context) executor = StreamFlowExecutor(workflow) await executor.run() assert len(transformer.get_output_port(port_name).token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -510,14 +506,14 @@ async def test_cwl_loop_output_all_step(context: StreamFlowContext): IterationTerminationToken(tag), ] - await put_tokens(list_token, in_port, context) + await _put_tokens(list_token, in_port, context) await workflow.save(context) executor = StreamFlowExecutor(workflow) await executor.run() assert len(out_port.token_list) == 2 - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, @@ -562,13 +558,13 @@ async def test_nested_crossproduct_combinator(context: StreamFlowContext): ListToken([Token("a"), Token("b")], tag="0.0"), ListToken([Token("c"), Token("d")], tag="0.1"), ] - await put_tokens(list_token_1, in_port_1, context) + await _put_tokens(list_token_1, in_port_1, context) list_token_2 = [ ListToken([Token("1"), Token("2")], tag="0.0"), ListToken([Token("3"), Token("4")], tag="0.1"), ] - await put_tokens(list_token_2, in_port_2, context) + await _put_tokens(list_token_2, in_port_2, context) await workflow.save(context) executor = StreamFlowExecutor(workflow) @@ -638,7 +634,7 @@ async def test_nested_crossproduct_combinator(context: StreamFlowContext): # check port_1 outputs assert len(out_port_1.token_list) == 5 for i, out_token in enumerate(out_port_1.token_list[:-1]): - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_token, port=out_port_1, context=context, @@ -649,7 +645,7 @@ async def test_nested_crossproduct_combinator(context: StreamFlowContext): # check port_2 outputs assert len(out_port_2.token_list) == 5 for i, out_token in enumerate(out_port_2.token_list[:-1]): - await verify_dependency_tokens( + await _verify_dependency_tokens( token=out_token, port=out_port_2, context=context, From 006fed414807795297cdf3c718f4051ac178b111 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Thu, 4 Jan 2024 00:13:34 +0100 Subject: [PATCH 21/69] fix --- tests/test_cwl_provenance.py | 81 ++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 12 deletions(-) diff --git a/tests/test_cwl_provenance.py b/tests/test_cwl_provenance.py index 20eacb21f..c928a949f 100644 --- a/tests/test_cwl_provenance.py +++ b/tests/test_cwl_provenance.py @@ -15,6 +15,8 @@ CWLInputInjectorStep, CWLLoopOutputAllStep, CWLTransferStep, + CWLLoopConditionalStep, + CWLLoopOutputLastStep, ) from streamflow.cwl.transformer import ( AllNonNullTransformer, @@ -319,7 +321,64 @@ async def test_cwl_conditional_step(context: StreamFlowContext): @pytest.mark.asyncio -async def test_transfer_step(context: StreamFlowContext): +async def test_cwl_empty_scatter_conditional_step(context: StreamFlowContext): + """Test token provenance for CWLEmptyScatterConditionalStep""" + workflow, (in_port, out_port) = await create_workflow(context) + port_name = "test" + token_list = [ListToken([Token("a")])] + await _general_test( + context=context, + workflow=workflow, + in_port=in_port, + out_port=out_port, + step_cls=CWLEmptyScatterConditionalStep, + kwargs_step={ + "name": utils.random_name() + "-empty-scatter-condition", + "scatter_method": "dotproduct", + }, + token_list=token_list, + port_name=port_name, + ) + assert len(out_port.token_list) == 2 + await _verify_dependency_tokens( + token=out_port.token_list[0], + port=out_port, + context=context, + expected_dependee=token_list, + ) + + +@pytest.mark.asyncio +async def test_cwl_loop_conditional_step(context: StreamFlowContext): + """Test token provenance for CWLLoopConditionalStep""" + workflow, (in_port, out_port) = await create_workflow(context) + port_name = "test" + token_list = [ListToken([Token("a")])] + await _general_test( + context=context, + workflow=workflow, + in_port=in_port, + out_port=out_port, + step_cls=CWLLoopConditionalStep, + kwargs_step={ + "name": utils.random_name() + "-when", + "expression": f"$(inputs.{port_name}.length == 1)", + "full_js": True, + }, + token_list=token_list, + port_name=port_name, + ) + assert len(out_port.token_list) == 2 + await _verify_dependency_tokens( + token=out_port.token_list[0], + port=out_port, + context=context, + expected_dependee=token_list, + ) + + +@pytest.mark.asyncio +async def test_cwl_transfer_step(context: StreamFlowContext): """Test token provenance for CWLTransferStep""" workflow, (in_port, out_port) = await create_workflow(context) deploy_step = create_deploy_step(workflow) @@ -489,35 +548,33 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): @pytest.mark.asyncio -async def test_cwl_loop_output_all_step(context: StreamFlowContext): - """Test token provenance for CWLLoopOutputAllStep""" +@pytest.mark.parametrize("step_cls", [CWLLoopOutputAllStep, CWLLoopOutputLastStep]) +async def test_cwl_loop_output(context: StreamFlowContext, step_cls): + """Test token provenance for CWLLoopOutput""" workflow, (in_port, out_port) = await create_workflow(context) step = workflow.create_step( - cls=CWLLoopOutputAllStep, + cls=step_cls, name=posixpath.join(utils.random_name(), "-loop-output"), ) port_name = "test" step.add_input_port(port_name, in_port) step.add_output_port(port_name, out_port) - tag = "0.1" - list_token = [ - ListToken([Token("a"), Token("b")], tag=tag), - IterationTerminationToken(tag), + token_list = [ + Token("b", tag="0.1"), + IterationTerminationToken("0.1"), ] - - await _put_tokens(list_token, in_port, context) + await _put_tokens(token_list, in_port, context) await workflow.save(context) executor = StreamFlowExecutor(workflow) await executor.run() - assert len(out_port.token_list) == 2 await _verify_dependency_tokens( token=out_port.token_list[0], port=out_port, context=context, - expected_dependee=[list_token[0]], + expected_dependee=[token_list[0]], ) From 6c0a93e9c65cf8e67bb577b4f3a9665e0730f1a6 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Thu, 4 Jan 2024 00:22:23 +0100 Subject: [PATCH 22/69] change signature method from public to private --- tests/test_change_wf.py | 100 ++++++++++++++++++------------------ tests/test_cwl_change_wf.py | 80 ++++++++++++++--------------- 2 files changed, 90 insertions(+), 90 deletions(-) diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py index 830e32376..b050319ab 100644 --- a/tests/test_change_wf.py +++ b/tests/test_change_wf.py @@ -35,21 +35,21 @@ ) -async def base_step_test_process( +async def _base_step_test_process( workflow, step_cls, kwargs_step, context, test_are_eq=True ): step = workflow.create_step(cls=step_cls, **kwargs_step) await workflow.save(context) - new_workflow, new_step = await clone_step(step, workflow, context) - persistent_id_test(workflow, new_workflow, step, new_step) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) if test_are_eq: - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) assert are_equals(step, new_step) return step, new_workflow, new_step -def persistent_id_test(original_workflow, new_workflow, original_elem, new_elem): +def _persistent_id_test(original_workflow, new_workflow, original_elem, new_elem): assert original_workflow.persistent_id assert new_workflow.persistent_id assert original_workflow.persistent_id != new_workflow.persistent_id @@ -61,7 +61,7 @@ def persistent_id_test(original_workflow, new_workflow, original_elem, new_elem) assert new_elem.workflow.persistent_id == new_workflow.persistent_id -async def general_test_port(context: StreamFlowContext, cls_port: Type[Port]): +async def _general_test_port(context: StreamFlowContext, cls_port: Type[Port]): workflow = Workflow( context=context, type="cwl", name=utils.random_name(), config={} ) @@ -79,7 +79,7 @@ async def general_test_port(context: StreamFlowContext, cls_port: Type[Port]): ) new_workflow.add_port(new_port) await new_workflow.save(context) - persistent_id_test(workflow, new_workflow, port, new_port) + _persistent_id_test(workflow, new_workflow, port, new_port) port.persistent_id = None new_port.persistent_id = None port.workflow = None @@ -87,14 +87,14 @@ async def general_test_port(context: StreamFlowContext, cls_port: Type[Port]): assert are_equals(port, new_port) -def set_val_to_attributes(elem, str_attributes: MutableSequence[str], val): +def _set_val_to_attributes(elem, str_attributes: MutableSequence[str], val): attrs = object_to_dict(elem) for attr in str_attributes: if attr in attrs.keys(): setattr(elem, attr, val) -def workflow_in_combinator_test(original_combinator, new_combinator): +def _workflow_in_combinator_test(original_combinator, new_combinator): assert ( original_combinator.workflow.persistent_id != new_combinator.workflow.persistent_id @@ -102,16 +102,16 @@ def workflow_in_combinator_test(original_combinator, new_combinator): for original_inner, new_inner in zip( original_combinator.combinators.values(), new_combinator.combinators.values() ): - workflow_in_combinator_test(original_inner, new_inner) + _workflow_in_combinator_test(original_inner, new_inner) -def set_workflow_in_combinator(combinator, workflow): +def _set_workflow_in_combinator(combinator, workflow): combinator.workflow = workflow for c in combinator.combinators.values(): - set_workflow_in_combinator(c, workflow) + _set_workflow_in_combinator(c, workflow) -async def clone_step(step, workflow, context): +async def _clone_step(step, workflow, context): new_workflow = Workflow( context=context, type="cwl", name=utils.random_name(), config={} ) @@ -133,19 +133,19 @@ async def clone_step(step, workflow, context): @pytest.mark.asyncio async def test_port(context: StreamFlowContext): """Test saving Port on database and re-load it in a new Workflow""" - await general_test_port(context, Port) + await _general_test_port(context, Port) @pytest.mark.asyncio async def test_job_port(context: StreamFlowContext): """Test saving JobPort on database and re-load it in a new Workflow""" - await general_test_port(context, JobPort) + await _general_test_port(context, JobPort) @pytest.mark.asyncio async def test_connection_port(context: StreamFlowContext): """Test saving ConnectorPort on database and re-load it in a new Workflow""" - await general_test_port(context, ConnectorPort) + await _general_test_port(context, ConnectorPort) @pytest.mark.asyncio @@ -177,8 +177,8 @@ async def test_execute_step(context: StreamFlowContext): ) step.add_input_port(in_port_name, in_port) await workflow.save(context) - new_workflow, new_step = await clone_step(step, workflow, context) - persistent_id_test(workflow, new_workflow, step, new_step) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) assert step.command.step.persistent_id != new_step.command.step.persistent_id step.command.step = None @@ -190,10 +190,10 @@ async def test_execute_step(context: StreamFlowContext): original_processor.workflow.persistent_id != new_processor.workflow.persistent_id ) - set_val_to_attributes(original_processor, ["workflow"], None) - set_val_to_attributes(new_processor, ["workflow"], None) - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(original_processor, ["workflow"], None) + _set_val_to_attributes(new_processor, ["workflow"], None) + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) assert are_equals(step, new_step) @@ -215,18 +215,18 @@ async def test_schedule_step(context: StreamFlowContext): ), ) await workflow.save(context) - new_workflow, new_step = await clone_step(step, workflow, context) - persistent_id_test(workflow, new_workflow, step, new_step) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) for original_filter, new_filter in zip( step.binding_config.filters, new_step.binding_config.filters ): # Config are read-only so workflows can share the same assert original_filter.persistent_id == new_filter.persistent_id - set_val_to_attributes(original_filter, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_filter, ["persistent_id", "workflow"], None) - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(original_filter, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_filter, ["persistent_id", "workflow"], None) + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) assert are_equals(step, new_step) @@ -251,7 +251,7 @@ async def test_combinator_step(context: StreamFlowContext, combinator: Combinato workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( context, num_port=4 ) - set_workflow_in_combinator(combinator, workflow) + _set_workflow_in_combinator(combinator, workflow) step = workflow.create_step( cls=CombinatorStep, name=utils.random_name() + "-combinator", @@ -266,14 +266,14 @@ async def test_combinator_step(context: StreamFlowContext, combinator: Combinato step.add_output_port(port_name_2, out_port_2) await workflow.save(context) - new_workflow, new_step = await clone_step(step, workflow, context) - persistent_id_test(workflow, new_workflow, step, new_step) - - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) - workflow_in_combinator_test(step.combinator, new_step.combinator) - set_workflow_in_combinator(step.combinator, None) - set_workflow_in_combinator(new_step.combinator, None) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) + + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _workflow_in_combinator_test(step.combinator, new_step.combinator) + _set_workflow_in_combinator(step.combinator, None) + _set_workflow_in_combinator(new_step.combinator, None) assert are_equals(step, new_step) @@ -298,14 +298,14 @@ async def test_loop_combinator_step(context: StreamFlowContext): step.add_output_port(port_name_2, out_port_2) await workflow.save(context) - new_workflow, new_step = await clone_step(step, workflow, context) - persistent_id_test(workflow, new_workflow, step, new_step) - - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) - workflow_in_combinator_test(step.combinator, new_step.combinator) - set_workflow_in_combinator(step.combinator, None) - set_workflow_in_combinator(new_step.combinator, None) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) + + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _workflow_in_combinator_test(step.combinator, new_step.combinator) + _set_workflow_in_combinator(step.combinator, None) + _set_workflow_in_combinator(new_step.combinator, None) assert are_equals(step, new_step) @@ -315,15 +315,15 @@ async def test_deploy_step(context: StreamFlowContext): workflow = (await create_workflow(context, num_port=0))[0] step = create_deploy_step(workflow) await workflow.save(context) - new_workflow, new_step = await clone_step(step, workflow, context) - persistent_id_test(workflow, new_workflow, step, new_step) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) @pytest.mark.asyncio async def test_gather_step(context: StreamFlowContext): """Test saving GatherStep on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=0))[0] - await base_step_test_process( + await _base_step_test_process( workflow, GatherStep, {"name": utils.random_name() + "-gather", "depth": 1}, @@ -335,6 +335,6 @@ async def test_gather_step(context: StreamFlowContext): async def test_scatter_step(context: StreamFlowContext): """Test saving ScatterStep on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=0))[0] - await base_step_test_process( + await _base_step_test_process( workflow, ScatterStep, {"name": utils.random_name() + "-scatter"}, context ) diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py index ede43654c..f79309a6b 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_change_wf.py @@ -30,11 +30,11 @@ are_equals, ) from tests.test_change_wf import ( - persistent_id_test, - set_val_to_attributes, - base_step_test_process, - set_workflow_in_combinator, - workflow_in_combinator_test, + _persistent_id_test, + _set_val_to_attributes, + _base_step_test_process, + _set_workflow_in_combinator, + _workflow_in_combinator_test, ) from tests.utils.workflow import create_workflow @@ -43,7 +43,7 @@ async def test_default_transformer(context: StreamFlowContext): """Test saving DefaultTransformer on database and re-load it in a new Workflow""" workflow, (port,) = await create_workflow(context, num_port=1) - await base_step_test_process( + await _base_step_test_process( workflow, DefaultTransformer, {"name": utils.random_name() + "-transformer", "default_port": port}, @@ -55,7 +55,7 @@ async def test_default_transformer(context: StreamFlowContext): async def test_default_retag_transformer(context: StreamFlowContext): """Test saving DefaultRetagTransformer on database and re-load it in a new Workflow""" workflow, (port,) = await create_workflow(context, num_port=1) - await base_step_test_process( + await _base_step_test_process( workflow, DefaultRetagTransformer, {"name": utils.random_name() + "-transformer", "default_port": port}, @@ -67,7 +67,7 @@ async def test_default_retag_transformer(context: StreamFlowContext): async def test_value_from_transformer(context: StreamFlowContext): """Test saving ValueFromTransformer on database and re-load it in a new Workflow""" workflow, (port,) = await create_workflow(context, num_port=1) - step, new_workflow, new_step = await base_step_test_process( + step, new_workflow, new_step = await _base_step_test_process( workflow, ValueFromTransformer, { @@ -83,14 +83,14 @@ async def test_value_from_transformer(context: StreamFlowContext): context, test_are_eq=False, ) - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) assert ( step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id ) - set_val_to_attributes(step.processor, ["workflow"], None) - set_val_to_attributes(new_step.processor, ["workflow"], None) + _set_val_to_attributes(step.processor, ["workflow"], None) + _set_val_to_attributes(new_step.processor, ["workflow"], None) assert are_equals(step, new_step) @@ -98,7 +98,7 @@ async def test_value_from_transformer(context: StreamFlowContext): async def test_all_non_null_transformer(context: StreamFlowContext): """Test saving AllNonNullTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await base_step_test_process( + await _base_step_test_process( workflow, AllNonNullTransformer, { @@ -112,7 +112,7 @@ async def test_all_non_null_transformer(context: StreamFlowContext): async def test_first_non_null_transformer(context: StreamFlowContext): """Test saving FirstNonNullTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await base_step_test_process( + await _base_step_test_process( workflow, FirstNonNullTransformer, { @@ -126,7 +126,7 @@ async def test_first_non_null_transformer(context: StreamFlowContext): async def test_forward_transformer(context: StreamFlowContext): """Test saving ForwardTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await base_step_test_process( + await _base_step_test_process( workflow, ForwardTransformer, { @@ -140,7 +140,7 @@ async def test_forward_transformer(context: StreamFlowContext): async def test_list_to_element_transformer(context: StreamFlowContext): """Test saving ListToElementTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await base_step_test_process( + await _base_step_test_process( workflow, ListToElementTransformer, { @@ -154,7 +154,7 @@ async def test_list_to_element_transformer(context: StreamFlowContext): async def test_only_non_null_transformer(context: StreamFlowContext): """Test saving OnlyNonNullTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await base_step_test_process( + await _base_step_test_process( workflow, OnlyNonNullTransformer, { @@ -169,7 +169,7 @@ async def test_cwl_token_transformer(context: StreamFlowContext): """Test saving CWLTokenTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] step_name = utils.random_name() - step, new_workflow, new_step = await base_step_test_process( + step, new_workflow, new_step = await _base_step_test_process( workflow, CWLTokenTransformer, { @@ -183,14 +183,14 @@ async def test_cwl_token_transformer(context: StreamFlowContext): context, test_are_eq=False, ) - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) assert ( step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id ) - set_val_to_attributes(step.processor, ["workflow"], None) - set_val_to_attributes(new_step.processor, ["workflow"], None) + _set_val_to_attributes(step.processor, ["workflow"], None) + _set_val_to_attributes(new_step.processor, ["workflow"], None) assert are_equals(step, new_step) @@ -198,7 +198,7 @@ async def test_cwl_token_transformer(context: StreamFlowContext): async def test_cwl_conditional_step(context: StreamFlowContext): """Test saving CWLConditionalStep on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await base_step_test_process( + await _base_step_test_process( workflow, CWLConditionalStep, { @@ -214,7 +214,7 @@ async def test_cwl_conditional_step(context: StreamFlowContext): async def test_cwl_transfer_step(context: StreamFlowContext): """Test saving CWLTransferStep on database and re-load it in a new Workflow""" workflow, (port,) = await create_workflow(context, num_port=1) - await base_step_test_process( + await _base_step_test_process( workflow, CWLTransferStep, { @@ -229,7 +229,7 @@ async def test_cwl_transfer_step(context: StreamFlowContext): async def test_cwl_input_injector_step(context: StreamFlowContext): """Test saving CWLInputInjectorStep on database and re-load it in a new Workflow""" workflow, (port,) = await create_workflow(context, num_port=1) - await base_step_test_process( + await _base_step_test_process( workflow, CWLInputInjectorStep, { @@ -244,7 +244,7 @@ async def test_cwl_input_injector_step(context: StreamFlowContext): async def test_empty_scatter_conditional_step(context: StreamFlowContext): """Test saving CWLEmptyScatterConditionalStep on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await base_step_test_process( + await _base_step_test_process( workflow, CWLEmptyScatterConditionalStep, { @@ -259,7 +259,7 @@ async def test_empty_scatter_conditional_step(context: StreamFlowContext): async def test_list_merge_combinator(context: StreamFlowContext): """Test saving ListMergeCombinator on database and re-load it in a new Workflow""" workflow, (port,) = await create_workflow(context, num_port=1) - step, new_workflow, new_step = await base_step_test_process( + step, new_workflow, new_step = await _base_step_test_process( workflow, CombinatorStep, { @@ -275,13 +275,13 @@ async def test_list_merge_combinator(context: StreamFlowContext): context, test_are_eq=False, ) - persistent_id_test(workflow, new_workflow, step, new_step) + _persistent_id_test(workflow, new_workflow, step, new_step) - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) - workflow_in_combinator_test(step.combinator, new_step.combinator) - set_workflow_in_combinator(step.combinator, None) - set_workflow_in_combinator(new_step.combinator, None) + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _workflow_in_combinator_test(step.combinator, new_step.combinator) + _set_workflow_in_combinator(step.combinator, None) + _set_workflow_in_combinator(new_step.combinator, None) assert are_equals(step, new_step) @@ -289,7 +289,7 @@ async def test_list_merge_combinator(context: StreamFlowContext): async def test_loop_value_from_transformer(context: StreamFlowContext): """Test saving LoopValueFromTransformer on database and re-load it in a new Workflow""" workflow, (port,) = await create_workflow(context, num_port=1) - step, new_workflow, new_step = await base_step_test_process( + step, new_workflow, new_step = await _base_step_test_process( workflow, LoopValueFromTransformer, { @@ -305,16 +305,16 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): context, test_are_eq=False, ) - persistent_id_test(workflow, new_workflow, step, new_step) + _persistent_id_test(workflow, new_workflow, step, new_step) - set_val_to_attributes(step, ["persistent_id", "workflow"], None) - set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(step, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) assert ( step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id ) - set_val_to_attributes(step.processor, ["workflow"], None) - set_val_to_attributes(new_step.processor, ["workflow"], None) + _set_val_to_attributes(step.processor, ["workflow"], None) + _set_val_to_attributes(new_step.processor, ["workflow"], None) assert are_equals(step, new_step) @@ -322,7 +322,7 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): async def test_cwl_loop_output_all_step(context: StreamFlowContext): """Test saving CWLLoopOutputAllStep on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await base_step_test_process( + await _base_step_test_process( workflow, CWLLoopOutputAllStep, { From bf5715edd04125d65ae8b8b1ef030a79655c57e2 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Thu, 4 Jan 2024 01:57:18 +0100 Subject: [PATCH 23/69] change parameter type --- streamflow/core/workflow.py | 16 ++++++++-------- streamflow/cwl/combinator.py | 2 +- streamflow/cwl/processor.py | 16 ++++++++-------- streamflow/cwl/step.py | 4 ++-- streamflow/cwl/transformer.py | 6 +++--- streamflow/workflow/combinator.py | 4 ++-- streamflow/workflow/step.py | 20 ++++++++++---------- 7 files changed, 34 insertions(+), 34 deletions(-) diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 91ae72df0..a144aa793 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -103,7 +103,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CommandOutputProcessor: return cls( name=row["name"], @@ -130,7 +130,7 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow = None, + workflow: Workflow | None = None, ) -> CommandOutputProcessor: type = cast( Type[CommandOutputProcessor], utils.get_class_from_name(row["type"]) @@ -264,7 +264,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> Port: return cls( name=row["name"], @@ -314,7 +314,7 @@ async def load( context: StreamFlowContext, persistent_id: int, loading_context: DatabaseLoadingContext, - workflow: Workflow = None, + workflow: Workflow | None = None, ) -> Port: row = await context.database.get_port(persistent_id) type = cast(Type[Port], utils.get_class_from_name(row["type"])) @@ -374,7 +374,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ): return cls( name=row["name"], @@ -441,7 +441,7 @@ async def load( context: StreamFlowContext, persistent_id: int, loading_context: DatabaseLoadingContext, - workflow: Workflow = None, + workflow: Workflow | None = None, ) -> Step: row = await context.database.get_step(persistent_id) type = cast(Type[Step], utils.get_class_from_name(row["type"])) @@ -593,7 +593,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> TokenProcessor: return cls( name=row["name"], @@ -611,7 +611,7 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow = None, + workflow: Workflow | None = None, ): type = cast(Type[TokenProcessor], utils.get_class_from_name(row["type"])) return await type._load(context, row["params"], loading_context, workflow) diff --git a/streamflow/cwl/combinator.py b/streamflow/cwl/combinator.py index 398317ed8..bdb1ecd2c 100644 --- a/streamflow/cwl/combinator.py +++ b/streamflow/cwl/combinator.py @@ -41,7 +41,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> ListMergeCombinator: return cls( name=row["name"], diff --git a/streamflow/cwl/processor.py b/streamflow/cwl/processor.py index 6c616ddbd..b9ff5803b 100644 --- a/streamflow/cwl/processor.py +++ b/streamflow/cwl/processor.py @@ -149,7 +149,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CWLTokenProcessor: format_graph = Graph() return cls( @@ -393,7 +393,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CommandOutputProcessor: return cls( name=row["name"], @@ -734,7 +734,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CWLMapTokenProcessor: return cls( name=row["name"], @@ -799,7 +799,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CommandOutputProcessor: return cls( name=row["name"], @@ -875,7 +875,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CWLObjectTokenProcessor: return cls( name=row["name"], @@ -980,7 +980,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CommandOutputProcessor: params = json.loads(row["params"]) return cls( @@ -1167,7 +1167,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CWLUnionTokenProcessor: return cls( name=row["name"], @@ -1287,7 +1287,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CommandOutputProcessor: return cls( name=row["name"], diff --git a/streamflow/cwl/step.py b/streamflow/cwl/step.py index c9166735f..afcea68cd 100644 --- a/streamflow/cwl/step.py +++ b/streamflow/cwl/step.py @@ -138,7 +138,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CWLConditionalStep: params = json.loads(row["params"]) step = cls( @@ -224,7 +224,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ): params = json.loads(row["params"]) return cls( diff --git a/streamflow/cwl/transformer.py b/streamflow/cwl/transformer.py index ac10ac292..5430fac16 100644 --- a/streamflow/cwl/transformer.py +++ b/streamflow/cwl/transformer.py @@ -46,7 +46,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ): params = json.loads(row["params"]) return cls( @@ -119,7 +119,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ): params = json.loads(row["params"]) return cls( @@ -251,7 +251,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ): params = json.loads(row["params"]) return cls( diff --git a/streamflow/workflow/combinator.py b/streamflow/workflow/combinator.py index 2d808bb35..bddb68f5e 100644 --- a/streamflow/workflow/combinator.py +++ b/streamflow/workflow/combinator.py @@ -26,7 +26,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CartesianProductCombinator: return cls( name=row["name"], @@ -220,7 +220,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> LoopTerminationCombinator: combinator = cls( name=row["name"], diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index 248748151..07c441ed9 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -54,7 +54,7 @@ async def _get_port( context: StreamFlowContext, port_id: int, loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ): if workflow: port_row = await context.database.get_port(port_id) @@ -171,7 +171,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> Combinator: return cls( name=row["name"], @@ -233,7 +233,7 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow = None, + workflow: Workflow | None = None, ) -> Combinator: type = cast(Combinator, utils.get_class_from_name(row["type"])) combinator = await type._load(context, row["params"], loading_context, workflow) @@ -302,7 +302,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> CombinatorStep: params = json.loads(row["params"]) return cls( @@ -473,7 +473,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> DeployStep: params = json.loads(row["params"]) return cls( @@ -589,7 +589,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> ExecuteStep: params = json.loads(row["params"]) step = cls( @@ -871,7 +871,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> GatherStep: params = json.loads(row["params"]) return cls( @@ -959,7 +959,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> InputInjectorStep: params = json.loads(row["params"]) return cls( @@ -1260,7 +1260,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> ScheduleStep: params = json.loads(row["params"]) if hardware_requirement := params.get("hardware_requirement"): @@ -1554,7 +1554,7 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow, + workflow: Workflow | None, ) -> TransferStep: params = json.loads(row["params"]) return cls( From 1029bb417174573894209457fb40aa5f6bc85e0b Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 01:12:36 +0100 Subject: [PATCH 24/69] moved change_workflow control into load_workflow method --- streamflow/core/persistence.py | 7 +++- streamflow/core/workflow.py | 24 ++++++------ streamflow/cwl/combinator.py | 6 +-- streamflow/cwl/processor.py | 48 +++++++++++------------ streamflow/cwl/step.py | 12 +++--- streamflow/cwl/transformer.py | 18 ++++----- streamflow/persistence/loading_context.py | 13 ++++-- streamflow/workflow/combinator.py | 12 +++--- streamflow/workflow/step.py | 48 +++++++++++------------ 9 files changed, 100 insertions(+), 88 deletions(-) diff --git a/streamflow/core/persistence.py b/streamflow/core/persistence.py index 2fd0b9ff3..6f8c0e3ae 100644 --- a/streamflow/core/persistence.py +++ b/streamflow/core/persistence.py @@ -66,7 +66,12 @@ async def load_token(self, context: StreamFlowContext, persistent_id: int): ... @abstractmethod - async def load_workflow(self, context: StreamFlowContext, persistent_id: int): + async def load_workflow( + self, + context: StreamFlowContext, + persistent_id: int, + workflow: Workflow | None = None, + ): ... diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index a144aa793..8dc07ed7b 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -107,9 +107,9 @@ async def _load( ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -268,9 +268,9 @@ async def _load( ) -> Port: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), ) async def _save_additional_params( @@ -378,9 +378,9 @@ async def _load( ): return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), ) async def _save_additional_params( @@ -597,9 +597,9 @@ async def _load( ) -> TokenProcessor: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), ) async def _save_additional_params(self, context: StreamFlowContext): diff --git a/streamflow/cwl/combinator.py b/streamflow/cwl/combinator.py index bdb1ecd2c..154e72082 100644 --- a/streamflow/cwl/combinator.py +++ b/streamflow/cwl/combinator.py @@ -45,9 +45,9 @@ async def _load( ) -> ListMergeCombinator: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), input_names=row["input_names"], output_name=row["output_name"], flatten=row["flatten"], diff --git a/streamflow/cwl/processor.py b/streamflow/cwl/processor.py index b9ff5803b..24118f16e 100644 --- a/streamflow/cwl/processor.py +++ b/streamflow/cwl/processor.py @@ -154,9 +154,9 @@ async def _load( format_graph = Graph() return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), token_type=row["token_type"], check_type=row["check_type"], enum_symbols=row["enum_symbols"], @@ -397,9 +397,9 @@ async def _load( ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -738,9 +738,9 @@ async def _load( ) -> CWLMapTokenProcessor: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), processor=await TokenProcessor.load( context, row["processor"], loading_context, workflow ), @@ -803,9 +803,9 @@ async def _load( ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), processor=await CommandOutputProcessor.load( context, row["processor"], loading_context, workflow ), @@ -879,9 +879,9 @@ async def _load( ) -> CWLObjectTokenProcessor: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), processors={ k: v for k, v in zip( @@ -985,9 +985,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -1171,9 +1171,9 @@ async def _load( ) -> CWLUnionTokenProcessor: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), processors=cast( MutableSequence[TokenProcessor], await asyncio.gather( @@ -1291,9 +1291,9 @@ async def _load( ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), processors=cast( MutableSequence[CommandOutputProcessor], await asyncio.gather( diff --git a/streamflow/cwl/step.py b/streamflow/cwl/step.py index afcea68cd..0571ba208 100644 --- a/streamflow/cwl/step.py +++ b/streamflow/cwl/step.py @@ -143,9 +143,9 @@ async def _load( params = json.loads(row["params"]) step = cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), expression=params["expression"], expression_lib=params["expression_lib"], full_js=params["full_js"], @@ -229,9 +229,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), scatter_method=params["scatter_method"], ) diff --git a/streamflow/cwl/transformer.py b/streamflow/cwl/transformer.py index 5430fac16..a67853648 100644 --- a/streamflow/cwl/transformer.py +++ b/streamflow/cwl/transformer.py @@ -51,9 +51,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), default_port=await loading_context.load_port( context, params["default_port"] ), @@ -124,9 +124,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), port_name=params["port_name"], processor=await TokenProcessor.load( context, params["processor"], loading_context, workflow @@ -256,9 +256,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), port_name=params["port_name"], processor=await TokenProcessor.load( context, params["processor"], loading_context, workflow diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 92800920a..f7095763b 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -68,7 +68,14 @@ async def load_token(self, context: StreamFlowContext, persistent_id: int): context, persistent_id, self ) - async def load_workflow(self, context: StreamFlowContext, persistent_id: int): - return self._workflows.get(persistent_id) or await Workflow.load( - context, persistent_id, self + async def load_workflow( + self, + context: StreamFlowContext, + persistent_id: int, + workflow: Workflow | None = None, + ): + return ( + workflow + or self._workflows.get(persistent_id) + or await Workflow.load(context, persistent_id, self) ) diff --git a/streamflow/workflow/combinator.py b/streamflow/workflow/combinator.py index bddb68f5e..4f5d7f545 100644 --- a/streamflow/workflow/combinator.py +++ b/streamflow/workflow/combinator.py @@ -30,9 +30,9 @@ async def _load( ) -> CartesianProductCombinator: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), depth=row["depth"], ) @@ -224,9 +224,9 @@ async def _load( ) -> LoopTerminationCombinator: combinator = cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), ) for item in row["output_items"]: combinator.add_output_item(item) diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index 07c441ed9..acbaa3115 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -175,9 +175,9 @@ async def _load( ) -> Combinator: return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), ) async def _save_additional_params(self, context: StreamFlowContext): @@ -307,9 +307,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), combinator=await Combinator.load( context, params["combinator"], loading_context, workflow ), @@ -478,9 +478,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), deployment_config=await loading_context.load_deployment( context, params["deployment_config"] ), @@ -594,9 +594,9 @@ async def _load( params = json.loads(row["params"]) step = cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), job_port=cast( JobPort, await _get_port(context, params["job_port"], loading_context, workflow), @@ -876,9 +876,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), depth=params["depth"], ) @@ -964,9 +964,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), job_port=cast( JobPort, await _get_port(context, params["job_port"], loading_context, workflow), @@ -1269,9 +1269,9 @@ async def _load( ) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), binding_config=await BindingConfig.load( context, params["binding_config"], loading_context ), @@ -1559,9 +1559,9 @@ async def _load( params = json.loads(row["params"]) return cls( name=row["name"], - workflow=workflow - if workflow - else await loading_context.load_workflow(context, row["workflow"]), + workflow=await loading_context.load_workflow( + context, row["workflow"], workflow + ), job_port=cast( JobPort, await _get_port(context, params["job_port"], loading_context, workflow), From ed3a69c6ef51216e407be981e002700419f18db5 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 01:22:09 +0100 Subject: [PATCH 25/69] fixed type error --- streamflow/persistence/loading_context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index f7095763b..d9fb74883 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -1,3 +1,4 @@ +from __future__ import annotations from typing import MutableMapping from streamflow.core.context import StreamFlowContext From 2a2d28657972a0cd9b557b223c7c06eeecf4fb3f Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 01:22:21 +0100 Subject: [PATCH 26/69] removed add_step and add_port methods --- streamflow/core/workflow.py | 10 ++-------- tests/test_change_wf.py | 9 +++++---- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 8dc07ed7b..a3012d0e2 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -658,21 +658,15 @@ def create_port(self, cls: type[P] = Port, name: str = None, **kwargs) -> P: if name is None: name = str(uuid.uuid4()) port = cls(workflow=self, name=name, **kwargs) - self.add_port(port) + self.ports[port.name] = port return port def create_step(self, cls: type[S], name: str = None, **kwargs) -> S: if name is None: name = str(uuid.uuid4()) step = cls(name=name, workflow=self, **kwargs) - self.add_step(step) - return step - - def add_port(self, port: Port): - self.ports[port.name] = port - - def add_step(self, step: Step): self.steps[step.name] = step + return step def get_output_port(self, name: str) -> Port: return self.ports[self.output_ports[name]] diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py index b050319ab..1ae225f5f 100644 --- a/tests/test_change_wf.py +++ b/tests/test_change_wf.py @@ -77,7 +77,7 @@ async def _general_test_port(context: StreamFlowContext, cls_port: Type[Port]): new_port = await Port.load( context, port.persistent_id, loading_context, new_workflow ) - new_workflow.add_port(new_port) + new_workflow.ports[new_port.name] = new_port await new_workflow.save(context) _persistent_id_test(workflow, new_workflow, port, new_port) port.persistent_id = None @@ -119,13 +119,14 @@ async def _clone_step(step, workflow, context): new_step = await Step.load( context, step.persistent_id, loading_context, new_workflow ) - new_workflow.add_step(new_step) + new_workflow.steps[new_step.name] = new_step # ports are not loaded in new_workflow. It is necessary to do it manually for port in workflow.ports.values(): - new_workflow.add_port( - await Port.load(context, port.persistent_id, loading_context, new_workflow) + new_port = await Port.load( + context, port.persistent_id, loading_context, new_workflow ) + new_workflow.ports[new_port.name] = new_port await new_workflow.save(context) return new_workflow, new_step From 53407c85c2e4d68d6aa1f64bf5f9e6812d088f5f Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 01:39:18 +0100 Subject: [PATCH 27/69] moved logic of the _get_port method into load_port method --- streamflow/core/persistence.py | 7 +++++- streamflow/core/utils.py | 9 +++++-- streamflow/cwl/step.py | 3 +-- streamflow/persistence/loading_context.py | 14 ++++++++++- streamflow/workflow/step.py | 30 ++++++----------------- 5 files changed, 34 insertions(+), 29 deletions(-) diff --git a/streamflow/core/persistence.py b/streamflow/core/persistence.py index 6f8c0e3ae..59f5251c8 100644 --- a/streamflow/core/persistence.py +++ b/streamflow/core/persistence.py @@ -50,7 +50,12 @@ async def load_filter(self, context: StreamFlowContext, persistent_id: int): ... @abstractmethod - async def load_port(self, context: StreamFlowContext, persistent_id: int): + async def load_port( + self, + context: StreamFlowContext, + persistent_id: int, + workflow: Workflow | None = None, + ): ... @abstractmethod diff --git a/streamflow/core/utils.py b/streamflow/core/utils.py index c1dacd159..a21debcf6 100644 --- a/streamflow/core/utils.py +++ b/streamflow/core/utils.py @@ -287,8 +287,13 @@ async def get_dependencies( loading_context: DatabaseLoadingContext, ): # This method is generally called from the step load method. - # If the workflow parameter in the load method has a value, the load_ports is False because - # it is not helpful to get the Port instance in loading_context + # If the steps and ports are loaded into their workflow, it is helpful call loading_context.load_port because + # - if port instance is already in the loading_context, it is used that instance + # - otherwise a new port instance is created, + # and it will be helpful because that instance will be added in the workflow + # If the ports are loaded into a new workflow, it is not helpful call loading_context.load_port because + # - if the instance is not into the loading_context it is created a new one, + # but this instance will not add into the new workflow because it has already the old workflow reference if load_ports: ports = await asyncio.gather( *( diff --git a/streamflow/cwl/step.py b/streamflow/cwl/step.py index 0571ba208..e64e8f162 100644 --- a/streamflow/cwl/step.py +++ b/streamflow/cwl/step.py @@ -29,7 +29,6 @@ LoopOutputStep, TransferStep, _get_token_ids, - _get_port, ) from streamflow.workflow.token import IterationTerminationToken, ListToken, ObjectToken @@ -155,7 +154,7 @@ async def _load( await asyncio.gather( *( asyncio.create_task( - _get_port(context, value, loading_context, workflow) + loading_context.load_port(context, value, workflow) ) for value in params["skip_ports"].values() ) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index d9fb74883..78cef6170 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -49,7 +49,19 @@ async def load_filter(self, context: StreamFlowContext, persistent_id: int): context, persistent_id, self ) - async def load_port(self, context: StreamFlowContext, persistent_id: int): + async def load_port( + self, + context: StreamFlowContext, + persistent_id: int, + workflow: Workflow | None = None, + ): + if workflow: + port_row = await context.database.get_port(persistent_id) + if port := workflow.ports.get(port_row["name"]): + return port + + # If the port is not available in the new workflow, a new one must be created + return await Port.load(context, persistent_id, self, workflow) return self._ports.get(persistent_id) or await Port.load( context, persistent_id, self ) diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index acbaa3115..f06f44532 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -50,22 +50,6 @@ ) -async def _get_port( - context: StreamFlowContext, - port_id: int, - loading_context: DatabaseLoadingContext, - workflow: Workflow | None, -): - if workflow: - port_row = await context.database.get_port(port_id) - if port := workflow.ports.get(port_row["name"]): - return port - - # If the port is not available in the new workflow, a new one must be created - return await Port.load(context, port_id, loading_context, workflow) - return await loading_context.load_port(context, port_id) - - def _get_directory(path_processor: ModuleType, directory: str | None, target: Target): return directory or path_processor.join(target.workdir, utils.random_name()) @@ -486,8 +470,8 @@ async def _load( ), connector_port=cast( ConnectorPort, - await _get_port( - context, params["connector_port"], loading_context, workflow + await loading_context.load_port( + context, params["connector_port"], workflow ), ), ) @@ -599,7 +583,7 @@ async def _load( ), job_port=cast( JobPort, - await _get_port(context, params["job_port"], loading_context, workflow), + await loading_context.load_port(context, params["job_port"], workflow), ), ) step.output_connectors = params["output_connectors"] @@ -969,7 +953,7 @@ async def _load( ), job_port=cast( JobPort, - await _get_port(context, params["job_port"], loading_context, workflow), + await loading_context.load_port(context, params["job_port"], workflow), ), ) @@ -1278,13 +1262,13 @@ async def _load( connector_ports={ k: cast( ConnectorPort, - await _get_port(context, v, loading_context, workflow), + await loading_context.load_port(context, v, workflow), ) for k, v in params["connector_ports"].items() }, job_port=cast( JobPort, - await _get_port(context, params["job_port"], loading_context, workflow), + await loading_context.load_port(context, params["job_port"], workflow), ), job_prefix=params["job_prefix"], hardware_requirement=hardware_requirement, @@ -1564,7 +1548,7 @@ async def _load( ), job_port=cast( JobPort, - await _get_port(context, params["job_port"], loading_context, workflow), + await loading_context.load_port(context, params["job_port"], workflow), ), ) From b60d21eef51bb07fdcc53b1978db3c7f939d2f31 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 01:45:32 +0100 Subject: [PATCH 28/69] fix --- streamflow/core/workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index a3012d0e2..931040eea 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -658,14 +658,14 @@ def create_port(self, cls: type[P] = Port, name: str = None, **kwargs) -> P: if name is None: name = str(uuid.uuid4()) port = cls(workflow=self, name=name, **kwargs) - self.ports[port.name] = port + self.ports[name] = port return port def create_step(self, cls: type[S], name: str = None, **kwargs) -> S: if name is None: name = str(uuid.uuid4()) step = cls(name=name, workflow=self, **kwargs) - self.steps[step.name] = step + self.steps[name] = step return step def get_output_port(self, name: str) -> Port: From 0f6d912c4b98ac579259a7c17b8a263c4b5741f2 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 02:04:29 +0100 Subject: [PATCH 29/69] added tests --- tests/test_cwl_change_wf.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py index f79309a6b..74f24247c 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_change_wf.py @@ -8,22 +8,24 @@ from streamflow.cwl.processor import CWLTokenProcessor from streamflow.cwl.step import ( CWLConditionalStep, - CWLTransferStep, - CWLInputInjectorStep, CWLEmptyScatterConditionalStep, + CWLInputInjectorStep, + CWLLoopConditionalStep, CWLLoopOutputAllStep, + CWLLoopOutputLastStep, + CWLTransferStep, ) from streamflow.cwl.transformer import ( - DefaultTransformer, - DefaultRetagTransformer, - CWLTokenTransformer, - LoopValueFromTransformer, - ValueFromTransformer, AllNonNullTransformer, + CWLTokenTransformer, + DefaultRetagTransformer, + DefaultTransformer, FirstNonNullTransformer, ForwardTransformer, ListToElementTransformer, + LoopValueFromTransformer, OnlyNonNullTransformer, + ValueFromTransformer, ) from streamflow.workflow.step import CombinatorStep from tests.conftest import ( @@ -240,6 +242,22 @@ async def test_cwl_input_injector_step(context: StreamFlowContext): ) +@pytest.mark.asyncio +async def test_cwl_loop_conditional_step(context: StreamFlowContext): + """Test saving CWLLoopConditionalStep on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await _base_step_test_process( + workflow, + CWLLoopConditionalStep, + { + "name": utils.random_name() + "-when", + "expression": f"$(inputs.{utils.random_name()}.length == 1)", + "full_js": True, + }, + context, + ) + + @pytest.mark.asyncio async def test_empty_scatter_conditional_step(context: StreamFlowContext): """Test saving CWLEmptyScatterConditionalStep on database and re-load it in a new Workflow""" @@ -319,12 +337,13 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): @pytest.mark.asyncio -async def test_cwl_loop_output_all_step(context: StreamFlowContext): +@pytest.mark.parametrize("step_cls", [CWLLoopOutputAllStep, CWLLoopOutputLastStep]) +async def test_cwl_loop_output(context: StreamFlowContext, step_cls): """Test saving CWLLoopOutputAllStep on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] await _base_step_test_process( workflow, - CWLLoopOutputAllStep, + step_cls, { "name": utils.random_name() + "-loop-output", }, From 52dd91378c54fa802d183fe0ae66bcb8e1ef83f8 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 02:12:58 +0100 Subject: [PATCH 30/69] used pytest parametrize --- tests/test_cwl_change_wf.py | 38 +++++++------------------------------ 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py index 74f24247c..e9da32363 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_change_wf.py @@ -97,26 +97,16 @@ async def test_value_from_transformer(context: StreamFlowContext): @pytest.mark.asyncio -async def test_all_non_null_transformer(context: StreamFlowContext): - """Test saving AllNonNullTransformer on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=1))[0] - await _base_step_test_process( - workflow, - AllNonNullTransformer, - { - "name": utils.random_name() + "-transformer", - }, - context, - ) - - -@pytest.mark.asyncio -async def test_first_non_null_transformer(context: StreamFlowContext): - """Test saving FirstNonNullTransformer on database and re-load it in a new Workflow""" +@pytest.mark.parametrize( + "transformer_cls", + [AllNonNullTransformer, FirstNonNullTransformer, OnlyNonNullTransformer], +) +async def test_non_null_transformer(context: StreamFlowContext, transformer_cls): + """Test saving All/First/Only NonNullTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] await _base_step_test_process( workflow, - FirstNonNullTransformer, + transformer_cls, { "name": utils.random_name() + "-transformer", }, @@ -152,20 +142,6 @@ async def test_list_to_element_transformer(context: StreamFlowContext): ) -@pytest.mark.asyncio -async def test_only_non_null_transformer(context: StreamFlowContext): - """Test saving OnlyNonNullTransformer on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=1))[0] - await _base_step_test_process( - workflow, - OnlyNonNullTransformer, - { - "name": utils.random_name() + "-transformer", - }, - context, - ) - - @pytest.mark.asyncio async def test_cwl_token_transformer(context: StreamFlowContext): """Test saving CWLTokenTransformer on database and re-load it in a new Workflow""" From 22084219a253c434104362bbbeb879d93106334b Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 23:31:29 +0100 Subject: [PATCH 31/69] made code more readable --- streamflow/persistence/loading_context.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 78cef6170..547263025 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -59,12 +59,13 @@ async def load_port( port_row = await context.database.get_port(persistent_id) if port := workflow.ports.get(port_row["name"]): return port - - # If the port is not available in the new workflow, a new one must be created - return await Port.load(context, persistent_id, self, workflow) - return self._ports.get(persistent_id) or await Port.load( - context, persistent_id, self - ) + else: + # If the port is not available in the new workflow, a new one must be created + return await Port.load(context, persistent_id, self, workflow) + else: + return self._ports.get(persistent_id) or await Port.load( + context, persistent_id, self + ) async def load_step(self, context: StreamFlowContext, persistent_id: int): return self._steps.get(persistent_id) or await Step.load( From a61d2ccd9c15429723b8a284e0d9e3b773e70690 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Fri, 5 Jan 2024 23:31:54 +0100 Subject: [PATCH 32/69] changes get_dependencies method name into load_dependencies --- streamflow/core/utils.py | 2 +- streamflow/core/workflow.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/streamflow/core/utils.py b/streamflow/core/utils.py index a21debcf6..e79ea9e70 100644 --- a/streamflow/core/utils.py +++ b/streamflow/core/utils.py @@ -280,7 +280,7 @@ def wrap_command(command: str): return ["/bin/sh", "-c", f"{command}"] -async def get_dependencies( +async def load_dependencies( dependency_rows: MutableSequence[MutableMapping[str, Any]], load_ports: bool, context: StreamFlowContext, diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 931040eea..3693e4930 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -15,7 +15,7 @@ DependencyType, PersistableEntity, ) -from streamflow.core.utils import get_dependencies +from streamflow.core.utils import load_dependencies if TYPE_CHECKING: from streamflow.core.deployment import Connector, Location, Target @@ -455,11 +455,11 @@ async def load( Status.SKIPPED, ] input_deps = await context.database.get_input_ports(persistent_id) - step.input_ports = await get_dependencies( + step.input_ports = await load_dependencies( input_deps, workflow is None, context, loading_context ) output_deps = await context.database.get_output_ports(persistent_id) - step.output_ports = await get_dependencies( + step.output_ports = await load_dependencies( output_deps, workflow is None, context, loading_context ) if not workflow: From e3b03e7cf522aca471d38d1b6732b8d5ea0fb637 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 01:16:39 +0100 Subject: [PATCH 33/69] created a new class --- streamflow/core/persistence.py | 6 +- streamflow/core/workflow.py | 49 +++++------- streamflow/cwl/combinator.py | 5 +- streamflow/cwl/processor.py | 58 ++++---------- streamflow/cwl/step.py | 14 +--- streamflow/cwl/transformer.py | 19 ++--- streamflow/persistence/loading_context.py | 94 ++++++++++++++++++---- streamflow/workflow/combinator.py | 10 +-- streamflow/workflow/step.py | 65 +++++----------- tests/test_change_wf.py | 95 ++++++++++++----------- 10 files changed, 192 insertions(+), 223 deletions(-) diff --git a/streamflow/core/persistence.py b/streamflow/core/persistence.py index 59f5251c8..780296628 100644 --- a/streamflow/core/persistence.py +++ b/streamflow/core/persistence.py @@ -13,6 +13,10 @@ class DatabaseLoadingContext(ABC): + @abstractmethod + def is_standard_loading(self) -> bool: + ... + @abstractmethod def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): ... @@ -54,7 +58,6 @@ async def load_port( self, context: StreamFlowContext, persistent_id: int, - workflow: Workflow | None = None, ): ... @@ -75,7 +78,6 @@ async def load_workflow( self, context: StreamFlowContext, persistent_id: int, - workflow: Workflow | None = None, ): ... diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 3693e4930..4ef1be039 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -103,13 +103,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -130,12 +127,11 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None = None, ) -> CommandOutputProcessor: type = cast( Type[CommandOutputProcessor], utils.get_class_from_name(row["type"]) ) - return await type._load(context, row["params"], loading_context, workflow) + return await type._load(context, row["params"], loading_context) @abstractmethod async def process( @@ -264,13 +260,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> Port: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), ) async def _save_additional_params( @@ -314,12 +307,11 @@ async def load( context: StreamFlowContext, persistent_id: int, loading_context: DatabaseLoadingContext, - workflow: Workflow | None = None, ) -> Port: row = await context.database.get_port(persistent_id) type = cast(Type[Port], utils.get_class_from_name(row["type"])) - port = await type._load(context, row, loading_context, workflow) - if not workflow: + port = await type._load(context, row, loading_context) + if loading_context.is_standard_loading(): port.persistent_id = persistent_id loading_context.add_port(persistent_id, port) return port @@ -374,13 +366,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ): return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), ) async def _save_additional_params( @@ -441,12 +430,11 @@ async def load( context: StreamFlowContext, persistent_id: int, loading_context: DatabaseLoadingContext, - workflow: Workflow | None = None, ) -> Step: row = await context.database.get_step(persistent_id) type = cast(Type[Step], utils.get_class_from_name(row["type"])) - step = await type._load(context, row, loading_context, workflow) - if not workflow: + step = await type._load(context, row, loading_context) + if loading_context.is_standard_loading(): step.persistent_id = persistent_id step.status = Status(row["status"]) step.terminated = step.status in [ @@ -456,14 +444,19 @@ async def load( ] input_deps = await context.database.get_input_ports(persistent_id) step.input_ports = await load_dependencies( - input_deps, workflow is None, context, loading_context + input_deps, + loading_context.is_standard_loading(), + context, + loading_context, ) output_deps = await context.database.get_output_ports(persistent_id) step.output_ports = await load_dependencies( - output_deps, workflow is None, context, loading_context + output_deps, + loading_context.is_standard_loading(), + context, + loading_context, ) - if not workflow: - loading_context.add_step(persistent_id, step) + loading_context.add_step(persistent_id, step) return step @abstractmethod @@ -593,13 +586,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> TokenProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), ) async def _save_additional_params(self, context: StreamFlowContext): @@ -611,10 +601,9 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None = None, ): type = cast(Type[TokenProcessor], utils.get_class_from_name(row["type"])) - return await type._load(context, row["params"], loading_context, workflow) + return await type._load(context, row["params"], loading_context) @abstractmethod async def process(self, inputs: MutableMapping[str, Token], token: Token) -> Token: diff --git a/streamflow/cwl/combinator.py b/streamflow/cwl/combinator.py index 154e72082..1a7cdaf7c 100644 --- a/streamflow/cwl/combinator.py +++ b/streamflow/cwl/combinator.py @@ -41,13 +41,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> ListMergeCombinator: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), input_names=row["input_names"], output_name=row["output_name"], flatten=row["flatten"], diff --git a/streamflow/cwl/processor.py b/streamflow/cwl/processor.py index 24118f16e..91efea2cc 100644 --- a/streamflow/cwl/processor.py +++ b/streamflow/cwl/processor.py @@ -149,14 +149,11 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CWLTokenProcessor: format_graph = Graph() return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), token_type=row["token_type"], check_type=row["check_type"], enum_symbols=row["enum_symbols"], @@ -393,13 +390,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -734,15 +728,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CWLMapTokenProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), processor=await TokenProcessor.load( - context, row["processor"], loading_context, workflow + context, row["processor"], loading_context ), optional=row["optional"], ) @@ -799,15 +790,12 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), processor=await CommandOutputProcessor.load( - context, row["processor"], loading_context, workflow + context, row["processor"], loading_context ), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] @@ -875,13 +863,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CWLObjectTokenProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), processors={ k: v for k, v in zip( @@ -889,9 +874,7 @@ async def _load( await asyncio.gather( *( asyncio.create_task( - TokenProcessor.load( - context, v, loading_context, workflow - ) + TokenProcessor.load(context, v, loading_context) ) for v in row["processors"].values() ) @@ -980,14 +963,11 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CommandOutputProcessor: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), target=(await loading_context.load_target(context, row["workflow"])) if row["target"] else None, @@ -998,9 +978,7 @@ async def _load( await asyncio.gather( *( asyncio.create_task( - CommandOutputProcessor.load( - context, v, loading_context, workflow - ) + CommandOutputProcessor.load(context, v, loading_context) ) for v in row["processors"].values() ) @@ -1167,19 +1145,16 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CWLUnionTokenProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), processors=cast( MutableSequence[TokenProcessor], await asyncio.gather( *( asyncio.create_task( - TokenProcessor.load(context, p, loading_context, workflow) + TokenProcessor.load(context, p, loading_context) ) for p in row["processors"] ) @@ -1287,21 +1262,16 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CommandOutputProcessor: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), processors=cast( MutableSequence[CommandOutputProcessor], await asyncio.gather( *( asyncio.create_task( - CommandOutputProcessor.load( - context, p, loading_context, workflow - ) + CommandOutputProcessor.load(context, p, loading_context) ) for p in row["processors"] ) diff --git a/streamflow/cwl/step.py b/streamflow/cwl/step.py index e64e8f162..edaf36283 100644 --- a/streamflow/cwl/step.py +++ b/streamflow/cwl/step.py @@ -137,14 +137,11 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CWLConditionalStep: params = json.loads(row["params"]) step = cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), expression=params["expression"], expression_lib=params["expression_lib"], full_js=params["full_js"], @@ -153,9 +150,7 @@ async def _load( params["skip_ports"].keys(), await asyncio.gather( *( - asyncio.create_task( - loading_context.load_port(context, value, workflow) - ) + asyncio.create_task(loading_context.load_port(context, value)) for value in params["skip_ports"].values() ) ), @@ -223,14 +218,11 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), scatter_method=params["scatter_method"], ) diff --git a/streamflow/cwl/transformer.py b/streamflow/cwl/transformer.py index a67853648..22119495a 100644 --- a/streamflow/cwl/transformer.py +++ b/streamflow/cwl/transformer.py @@ -46,14 +46,11 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), default_port=await loading_context.load_port( context, params["default_port"] ), @@ -119,17 +116,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), port_name=params["port_name"], processor=await TokenProcessor.load( - context, params["processor"], loading_context, workflow + context, params["processor"], loading_context ), ) @@ -251,17 +245,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ): params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), port_name=params["port_name"], processor=await TokenProcessor.load( - context, params["processor"], loading_context, workflow + context, params["processor"], loading_context ), value_from=params["value_from"], expression_lib=params["expression_lib"], diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 547263025..e31401181 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -53,19 +53,10 @@ async def load_port( self, context: StreamFlowContext, persistent_id: int, - workflow: Workflow | None = None, ): - if workflow: - port_row = await context.database.get_port(persistent_id) - if port := workflow.ports.get(port_row["name"]): - return port - else: - # If the port is not available in the new workflow, a new one must be created - return await Port.load(context, persistent_id, self, workflow) - else: - return self._ports.get(persistent_id) or await Port.load( - context, persistent_id, self - ) + return self._ports.get(persistent_id) or await Port.load( + context, persistent_id, self + ) async def load_step(self, context: StreamFlowContext, persistent_id: int): return self._steps.get(persistent_id) or await Step.load( @@ -86,10 +77,79 @@ async def load_workflow( self, context: StreamFlowContext, persistent_id: int, - workflow: Workflow | None = None, ): - return ( - workflow - or self._workflows.get(persistent_id) - or await Workflow.load(context, persistent_id, self) + return self._workflows.get(persistent_id) or await Workflow.load( + context, persistent_id, self ) + + def is_standard_loading(self) -> bool: + return True + + +class WorkflowLoader(DatabaseLoadingContext): + def __init__(self, workflow: Workflow): + super().__init__() + self.workflow: Workflow = workflow + self._tokens: MutableMapping[int, Token] = {} + + def is_standard_loading(self) -> bool: + return False + + def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): + ... + + def add_filter(self, persistent_id: int, filter_config: FilterConfig): + ... + + def add_port(self, persistent_id: int, port: Port): + ... + + def add_step(self, persistent_id: int, step: Step): + ... + + def add_target(self, persistent_id: int, target: Target): + ... + + def add_token(self, persistent_id: int, token: Token): + self._tokens[persistent_id] = token + + def add_workflow(self, persistent_id: int, workflow: Workflow): + ... + + async def load_deployment(self, context: StreamFlowContext, persistent_id: int): + return await DeploymentConfig.load(context, persistent_id, self) + + async def load_filter(self, context: StreamFlowContext, persistent_id: int): + return await FilterConfig.load(context, persistent_id, self) + + async def load_port( + self, + context: StreamFlowContext, + persistent_id: int, + ): + port_row = await context.database.get_port(persistent_id) + if port := self.workflow.ports.get(port_row["name"]): + return port + else: + # If the port is not available in the new workflow, a new one must be created + port = await Port.load(context, persistent_id, self) + self.workflow.ports[port.name] = port + return port + + async def load_step(self, context: StreamFlowContext, persistent_id: int): + return await Step.load(context, persistent_id, self) + + async def load_target(self, context: StreamFlowContext, persistent_id: int): + return await Target.load(context, persistent_id, self) + + async def load_token(self, context: StreamFlowContext, persistent_id: int): + return self._tokens.get(persistent_id) or await Token.load( + context, persistent_id, self + ) + + async def load_workflow( + self, + context: StreamFlowContext, + persistent_id: int, + ): + return self.workflow diff --git a/streamflow/workflow/combinator.py b/streamflow/workflow/combinator.py index 4f5d7f545..4165d94de 100644 --- a/streamflow/workflow/combinator.py +++ b/streamflow/workflow/combinator.py @@ -26,13 +26,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CartesianProductCombinator: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), depth=row["depth"], ) @@ -220,13 +217,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> LoopTerminationCombinator: combinator = cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), ) for item in row["output_items"]: combinator.add_output_item(item) diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index f06f44532..33756ad26 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -155,13 +155,10 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> Combinator: return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), ) async def _save_additional_params(self, context: StreamFlowContext): @@ -217,16 +214,15 @@ async def load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None = None, ) -> Combinator: type = cast(Combinator, utils.get_class_from_name(row["type"])) - combinator = await type._load(context, row["params"], loading_context, workflow) + combinator = await type._load(context, row["params"], loading_context) combinator.items = row["params"]["items"] combinator.combinators_map = row["params"]["combinators_map"] combinator.combinators = {} for k, c in row["params"]["combinators"].items(): combinator.combinators[k] = await Combinator.load( - context, c, loading_context, workflow + context, c, loading_context ) return combinator @@ -286,16 +282,13 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> CombinatorStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), combinator=await Combinator.load( - context, params["combinator"], loading_context, workflow + context, params["combinator"], loading_context ), ) @@ -457,22 +450,17 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> DeployStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), deployment_config=await loading_context.load_deployment( context, params["deployment_config"] ), connector_port=cast( ConnectorPort, - await loading_context.load_port( - context, params["connector_port"], workflow - ), + await loading_context.load_port(context, params["connector_port"]), ), ) @@ -573,17 +561,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> ExecuteStep: params = json.loads(row["params"]) step = cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), job_port=cast( JobPort, - await loading_context.load_port(context, params["job_port"], workflow), + await loading_context.load_port(context, params["job_port"]), ), ) step.output_connectors = params["output_connectors"] @@ -594,9 +579,7 @@ async def _load( await asyncio.gather( *( asyncio.create_task( - CommandOutputProcessor.load( - context, p, loading_context, workflow - ) + CommandOutputProcessor.load(context, p, loading_context) ) for p in params["output_processors"].values() ) @@ -855,14 +838,11 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> GatherStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), depth=params["depth"], ) @@ -943,17 +923,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> InputInjectorStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), job_port=cast( JobPort, - await loading_context.load_port(context, params["job_port"], workflow), + await loading_context.load_port(context, params["job_port"]), ), ) @@ -1244,7 +1221,6 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> ScheduleStep: params = json.loads(row["params"]) if hardware_requirement := params.get("hardware_requirement"): @@ -1253,22 +1229,20 @@ async def _load( ) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), binding_config=await BindingConfig.load( context, params["binding_config"], loading_context ), connector_ports={ k: cast( ConnectorPort, - await loading_context.load_port(context, v, workflow), + await loading_context.load_port(context, v), ) for k, v in params["connector_ports"].items() }, job_port=cast( JobPort, - await loading_context.load_port(context, params["job_port"], workflow), + await loading_context.load_port(context, params["job_port"]), ), job_prefix=params["job_prefix"], hardware_requirement=hardware_requirement, @@ -1538,17 +1512,14 @@ async def _load( context: StreamFlowContext, row: MutableMapping[str, Any], loading_context: DatabaseLoadingContext, - workflow: Workflow | None, ) -> TransferStep: params = json.loads(row["params"]) return cls( name=row["name"], - workflow=await loading_context.load_workflow( - context, row["workflow"], workflow - ), + workflow=await loading_context.load_workflow(context, row["workflow"]), job_port=cast( JobPort, - await loading_context.load_port(context, params["job_port"], workflow), + await loading_context.load_port(context, params["job_port"]), ), ) diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py index 1ae225f5f..dcc29bc85 100644 --- a/tests/test_change_wf.py +++ b/tests/test_change_wf.py @@ -9,7 +9,7 @@ from streamflow.core.workflow import Workflow, Port, Step from streamflow.cwl.command import CWLCommand, CWLCommandToken from streamflow.cwl.translator import _create_command_output_processor_base -from streamflow.persistence.loading_context import DefaultDatabaseLoadingContext +from streamflow.persistence.loading_context import WorkflowLoader from streamflow.workflow.combinator import LoopCombinator from streamflow.workflow.port import ConnectorPort, JobPort from streamflow.workflow.step import ( @@ -43,22 +43,37 @@ async def _base_step_test_process( new_workflow, new_step = await _clone_step(step, workflow, context) _persistent_id_test(workflow, new_workflow, step, new_step) if test_are_eq: + for p1, p2 in zip(workflow.ports.values(), new_workflow.ports.values()): + assert p1.persistent_id != p2.persistent_id + assert p1.workflow.name != p2.workflow.name + for p in workflow.ports.values(): + p.persistent_id = None + p.workflow = None + for p in new_workflow.ports.values(): + p.persistent_id = None + p.workflow = None _set_val_to_attributes(step, ["persistent_id", "workflow"], None) _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) assert are_equals(step, new_step) - return step, new_workflow, new_step + return None, None, None + else: + return step, new_workflow, new_step -def _persistent_id_test(original_workflow, new_workflow, original_elem, new_elem): - assert original_workflow.persistent_id - assert new_workflow.persistent_id - assert original_workflow.persistent_id != new_workflow.persistent_id - if isinstance(original_elem, Step): - assert new_elem.name in new_workflow.steps.keys() - if isinstance(original_elem, Port): - assert new_elem.name in new_workflow.ports.keys() - assert original_elem.persistent_id != new_elem.persistent_id - assert new_elem.workflow.persistent_id == new_workflow.persistent_id +async def _clone_step(step, workflow, context): + new_workflow = Workflow( + context=context, type="cwl", name=utils.random_name(), config={} + ) + loading_context = WorkflowLoader(workflow=new_workflow) + new_step = await loading_context.load_step(context, step.persistent_id) + new_workflow.steps[new_step.name] = new_step + + # ports are not loaded in new_workflow. It is necessary to do it manually + for port in workflow.ports.values(): + new_port = await loading_context.load_port(context, port.persistent_id) + new_workflow.ports[new_port.name] = new_port + await new_workflow.save(context) + return new_workflow, new_step async def _general_test_port(context: StreamFlowContext, cls_port: Type[Port]): @@ -70,23 +85,31 @@ async def _general_test_port(context: StreamFlowContext, cls_port: Type[Port]): assert workflow.persistent_id assert port.persistent_id - loading_context = DefaultDatabaseLoadingContext() new_workflow = Workflow( context=context, type="cwl", name=utils.random_name(), config={} ) - new_port = await Port.load( - context, port.persistent_id, loading_context, new_workflow - ) + loading_context = WorkflowLoader(workflow=new_workflow) + new_port = await loading_context.load_port(context, port.persistent_id) new_workflow.ports[new_port.name] = new_port await new_workflow.save(context) _persistent_id_test(workflow, new_workflow, port, new_port) - port.persistent_id = None - new_port.persistent_id = None - port.workflow = None - new_port.workflow = None + _set_val_to_attributes(port, ["persistent_id", "workflow"], None) + _set_val_to_attributes(new_port, ["persistent_id", "workflow"], None) assert are_equals(port, new_port) +def _persistent_id_test(original_workflow, new_workflow, original_elem, new_elem): + assert original_workflow.persistent_id + assert new_workflow.persistent_id + assert original_workflow.persistent_id != new_workflow.persistent_id + if isinstance(original_elem, Step): + assert new_elem.name in new_workflow.steps.keys() + if isinstance(original_elem, Port): + assert new_elem.name in new_workflow.ports.keys() + assert original_elem.persistent_id != new_elem.persistent_id + assert new_elem.workflow.persistent_id == new_workflow.persistent_id + + def _set_val_to_attributes(elem, str_attributes: MutableSequence[str], val): attrs = object_to_dict(elem) for attr in str_attributes: @@ -94,6 +117,12 @@ def _set_val_to_attributes(elem, str_attributes: MutableSequence[str], val): setattr(elem, attr, val) +def _set_workflow_in_combinator(combinator, workflow): + combinator.workflow = workflow + for c in combinator.combinators.values(): + _set_workflow_in_combinator(c, workflow) + + def _workflow_in_combinator_test(original_combinator, new_combinator): assert ( original_combinator.workflow.persistent_id @@ -105,32 +134,6 @@ def _workflow_in_combinator_test(original_combinator, new_combinator): _workflow_in_combinator_test(original_inner, new_inner) -def _set_workflow_in_combinator(combinator, workflow): - combinator.workflow = workflow - for c in combinator.combinators.values(): - _set_workflow_in_combinator(c, workflow) - - -async def _clone_step(step, workflow, context): - new_workflow = Workflow( - context=context, type="cwl", name=utils.random_name(), config={} - ) - loading_context = DefaultDatabaseLoadingContext() - new_step = await Step.load( - context, step.persistent_id, loading_context, new_workflow - ) - new_workflow.steps[new_step.name] = new_step - - # ports are not loaded in new_workflow. It is necessary to do it manually - for port in workflow.ports.values(): - new_port = await Port.load( - context, port.persistent_id, loading_context, new_workflow - ) - new_workflow.ports[new_port.name] = new_port - await new_workflow.save(context) - return new_workflow, new_step - - @pytest.mark.asyncio async def test_port(context: StreamFlowContext): """Test saving Port on database and re-load it in a new Workflow""" From f6e0d3fc76db10958d97bd10cd077872d203d618 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 01:25:54 +0100 Subject: [PATCH 34/69] made code more readable --- tests/test_change_wf.py | 49 +++++++++++++++++-------------------- tests/test_cwl_change_wf.py | 36 +++++++++++++-------------- 2 files changed, 40 insertions(+), 45 deletions(-) diff --git a/tests/test_change_wf.py b/tests/test_change_wf.py index dcc29bc85..465fc07a9 100644 --- a/tests/test_change_wf.py +++ b/tests/test_change_wf.py @@ -1,4 +1,4 @@ -from typing import Type, cast, MutableSequence +from typing import Type, cast import pytest @@ -20,10 +20,7 @@ GatherStep, ScatterStep, ) -from tests.conftest import ( - are_equals, - object_to_dict, -) +from tests.conftest import are_equals from tests.utils.workflow import ( create_workflow, create_schedule_step, @@ -52,8 +49,8 @@ async def _base_step_test_process( for p in new_workflow.ports.values(): p.persistent_id = None p.workflow = None - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) assert are_equals(step, new_step) return None, None, None else: @@ -93,8 +90,8 @@ async def _general_test_port(context: StreamFlowContext, cls_port: Type[Port]): new_workflow.ports[new_port.name] = new_port await new_workflow.save(context) _persistent_id_test(workflow, new_workflow, port, new_port) - _set_val_to_attributes(port, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_port, ["persistent_id", "workflow"], None) + _set_to_none(port, id_to_none=True, wf_to_none=True) + _set_to_none(new_port, id_to_none=True, wf_to_none=True) assert are_equals(port, new_port) @@ -110,11 +107,11 @@ def _persistent_id_test(original_workflow, new_workflow, original_elem, new_elem assert new_elem.workflow.persistent_id == new_workflow.persistent_id -def _set_val_to_attributes(elem, str_attributes: MutableSequence[str], val): - attrs = object_to_dict(elem) - for attr in str_attributes: - if attr in attrs.keys(): - setattr(elem, attr, val) +def _set_to_none(elem, id_to_none=False, wf_to_none=False): + if id_to_none: + elem.persistent_id = None + if wf_to_none: + elem.workflow = None def _set_workflow_in_combinator(combinator, workflow): @@ -194,10 +191,10 @@ async def test_execute_step(context: StreamFlowContext): original_processor.workflow.persistent_id != new_processor.workflow.persistent_id ) - _set_val_to_attributes(original_processor, ["workflow"], None) - _set_val_to_attributes(new_processor, ["workflow"], None) - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(original_processor, wf_to_none=True) + _set_to_none(new_processor, wf_to_none=True) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) assert are_equals(step, new_step) @@ -227,10 +224,10 @@ async def test_schedule_step(context: StreamFlowContext): ): # Config are read-only so workflows can share the same assert original_filter.persistent_id == new_filter.persistent_id - _set_val_to_attributes(original_filter, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_filter, ["persistent_id", "workflow"], None) - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(original_filter, id_to_none=True, wf_to_none=True) + _set_to_none(new_filter, id_to_none=True, wf_to_none=True) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) assert are_equals(step, new_step) @@ -273,8 +270,8 @@ async def test_combinator_step(context: StreamFlowContext, combinator: Combinato new_workflow, new_step = await _clone_step(step, workflow, context) _persistent_id_test(workflow, new_workflow, step, new_step) - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) _workflow_in_combinator_test(step.combinator, new_step.combinator) _set_workflow_in_combinator(step.combinator, None) _set_workflow_in_combinator(new_step.combinator, None) @@ -305,8 +302,8 @@ async def test_loop_combinator_step(context: StreamFlowContext): new_workflow, new_step = await _clone_step(step, workflow, context) _persistent_id_test(workflow, new_workflow, step, new_step) - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) _workflow_in_combinator_test(step.combinator, new_step.combinator) _set_workflow_in_combinator(step.combinator, None) _set_workflow_in_combinator(new_step.combinator, None) diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_change_wf.py index e9da32363..ebd02fdfb 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_change_wf.py @@ -28,15 +28,13 @@ ValueFromTransformer, ) from streamflow.workflow.step import CombinatorStep -from tests.conftest import ( - are_equals, -) +from tests.conftest import are_equals from tests.test_change_wf import ( - _persistent_id_test, - _set_val_to_attributes, _base_step_test_process, + _persistent_id_test, _set_workflow_in_combinator, _workflow_in_combinator_test, + _set_to_none, ) from tests.utils.workflow import create_workflow @@ -85,14 +83,14 @@ async def test_value_from_transformer(context: StreamFlowContext): context, test_are_eq=False, ) - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) assert ( step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id ) - _set_val_to_attributes(step.processor, ["workflow"], None) - _set_val_to_attributes(new_step.processor, ["workflow"], None) + _set_to_none(step.processor, wf_to_none=True) + _set_to_none(new_step.processor, wf_to_none=True) assert are_equals(step, new_step) @@ -161,14 +159,14 @@ async def test_cwl_token_transformer(context: StreamFlowContext): context, test_are_eq=False, ) - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) assert ( step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id ) - _set_val_to_attributes(step.processor, ["workflow"], None) - _set_val_to_attributes(new_step.processor, ["workflow"], None) + _set_to_none(step.processor, id_to_none=True, wf_to_none=True) + _set_to_none(new_step.processor, id_to_none=True, wf_to_none=True) assert are_equals(step, new_step) @@ -271,8 +269,8 @@ async def test_list_merge_combinator(context: StreamFlowContext): ) _persistent_id_test(workflow, new_workflow, step, new_step) - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) _workflow_in_combinator_test(step.combinator, new_step.combinator) _set_workflow_in_combinator(step.combinator, None) _set_workflow_in_combinator(new_step.combinator, None) @@ -301,14 +299,14 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): ) _persistent_id_test(workflow, new_workflow, step, new_step) - _set_val_to_attributes(step, ["persistent_id", "workflow"], None) - _set_val_to_attributes(new_step, ["persistent_id", "workflow"], None) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) assert ( step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id ) - _set_val_to_attributes(step.processor, ["workflow"], None) - _set_val_to_attributes(new_step.processor, ["workflow"], None) + _set_to_none(step.processor, id_to_none=True, wf_to_none=True) + _set_to_none(new_step.processor, id_to_none=True, wf_to_none=True) assert are_equals(step, new_step) From af2ecff96f7be2545dc76798ede6e7a7f42ab5c0 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 01:32:57 +0100 Subject: [PATCH 35/69] fix --- streamflow/cwl/step.py | 4 ++-- streamflow/persistence/loading_context.py | 12 ++---------- streamflow/workflow/step.py | 17 +++++------------ 3 files changed, 9 insertions(+), 24 deletions(-) diff --git a/streamflow/cwl/step.py b/streamflow/cwl/step.py index edaf36283..cfa21f9f8 100644 --- a/streamflow/cwl/step.py +++ b/streamflow/cwl/step.py @@ -150,8 +150,8 @@ async def _load( params["skip_ports"].keys(), await asyncio.gather( *( - asyncio.create_task(loading_context.load_port(context, value)) - for value in params["skip_ports"].values() + asyncio.create_task(loading_context.load_port(context, port_id)) + for port_id in params["skip_ports"].values() ) ), ): diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index e31401181..e23e46544 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -49,11 +49,7 @@ async def load_filter(self, context: StreamFlowContext, persistent_id: int): context, persistent_id, self ) - async def load_port( - self, - context: StreamFlowContext, - persistent_id: int, - ): + async def load_port(self, context: StreamFlowContext, persistent_id: int): return self._ports.get(persistent_id) or await Port.load( context, persistent_id, self ) @@ -73,11 +69,7 @@ async def load_token(self, context: StreamFlowContext, persistent_id: int): context, persistent_id, self ) - async def load_workflow( - self, - context: StreamFlowContext, - persistent_id: int, - ): + async def load_workflow(self, context: StreamFlowContext, persistent_id: int): return self._workflows.get(persistent_id) or await Workflow.load( context, persistent_id, self ) diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index 33756ad26..8e79e2742 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -567,8 +567,7 @@ async def _load( name=row["name"], workflow=await loading_context.load_workflow(context, row["workflow"]), job_port=cast( - JobPort, - await loading_context.load_port(context, params["job_port"]), + JobPort, await loading_context.load_port(context, params["job_port"]) ), ) step.output_connectors = params["output_connectors"] @@ -929,8 +928,7 @@ async def _load( name=row["name"], workflow=await loading_context.load_workflow(context, row["workflow"]), job_port=cast( - JobPort, - await loading_context.load_port(context, params["job_port"]), + JobPort, await loading_context.load_port(context, params["job_port"]) ), ) @@ -1234,15 +1232,11 @@ async def _load( context, params["binding_config"], loading_context ), connector_ports={ - k: cast( - ConnectorPort, - await loading_context.load_port(context, v), - ) + k: cast(ConnectorPort, await loading_context.load_port(context, v)) for k, v in params["connector_ports"].items() }, job_port=cast( - JobPort, - await loading_context.load_port(context, params["job_port"]), + JobPort, await loading_context.load_port(context, params["job_port"]) ), job_prefix=params["job_prefix"], hardware_requirement=hardware_requirement, @@ -1518,8 +1512,7 @@ async def _load( name=row["name"], workflow=await loading_context.load_workflow(context, row["workflow"]), job_port=cast( - JobPort, - await loading_context.load_port(context, params["job_port"]), + JobPort, await loading_context.load_port(context, params["job_port"]) ), ) From b3f77c029a34ec2514398783f9fe5cbee7da460a Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 11:59:20 +0100 Subject: [PATCH 36/69] removed is_std_loading method --- streamflow/core/persistence.py | 4 ---- streamflow/core/workflow.py | 25 ++++++++++------------- streamflow/persistence/loading_context.py | 8 ++------ 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/streamflow/core/persistence.py b/streamflow/core/persistence.py index 780296628..266df82c1 100644 --- a/streamflow/core/persistence.py +++ b/streamflow/core/persistence.py @@ -13,10 +13,6 @@ class DatabaseLoadingContext(ABC): - @abstractmethod - def is_standard_loading(self) -> bool: - ... - @abstractmethod def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): ... diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 4ef1be039..4b1c47526 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -311,9 +311,7 @@ async def load( row = await context.database.get_port(persistent_id) type = cast(Type[Port], utils.get_class_from_name(row["type"])) port = await type._load(context, row, loading_context) - if loading_context.is_standard_loading(): - port.persistent_id = persistent_id - loading_context.add_port(persistent_id, port) + loading_context.add_port(persistent_id, port) return port def put(self, token: Token): @@ -434,29 +432,28 @@ async def load( row = await context.database.get_step(persistent_id) type = cast(Type[Step], utils.get_class_from_name(row["type"])) step = await type._load(context, row, loading_context) - if loading_context.is_standard_loading(): - step.persistent_id = persistent_id - step.status = Status(row["status"]) - step.terminated = step.status in [ - Status.COMPLETED, - Status.FAILED, - Status.SKIPPED, - ] input_deps = await context.database.get_input_ports(persistent_id) + loading_context.add_step(persistent_id, step) step.input_ports = await load_dependencies( input_deps, - loading_context.is_standard_loading(), + step.persistent_id is None, context, loading_context, ) output_deps = await context.database.get_output_ports(persistent_id) step.output_ports = await load_dependencies( output_deps, - loading_context.is_standard_loading(), + step.persistent_id is None, context, loading_context, ) - loading_context.add_step(persistent_id, step) + if step.persistent_id: + step.status = Status(row["status"]) + step.terminated = step.status in [ + Status.COMPLETED, + Status.FAILED, + Status.SKIPPED, + ] return step @abstractmethod diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index e23e46544..173387a68 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -25,9 +25,11 @@ def add_filter(self, persistent_id: int, filter_config: FilterConfig): self._filter_configs[persistent_id] = filter_config def add_port(self, persistent_id: int, port: Port): + port.persistent_id = persistent_id self._ports[persistent_id] = port def add_step(self, persistent_id: int, step: Step): + step.persistent_id = persistent_id self._steps[persistent_id] = step def add_target(self, persistent_id: int, target: Target): @@ -74,9 +76,6 @@ async def load_workflow(self, context: StreamFlowContext, persistent_id: int): context, persistent_id, self ) - def is_standard_loading(self) -> bool: - return True - class WorkflowLoader(DatabaseLoadingContext): def __init__(self, workflow: Workflow): @@ -84,9 +83,6 @@ def __init__(self, workflow: Workflow): self.workflow: Workflow = workflow self._tokens: MutableMapping[int, Token] = {} - def is_standard_loading(self) -> bool: - return False - def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): ... From dfd2819ec43e1227f31bc7157ece61f899df2c05 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 11:59:40 +0100 Subject: [PATCH 37/69] renamed test files --- tests/{test_change_wf.py => test_build_wf.py} | 0 tests/{test_cwl_change_wf.py => test_cwl_build_wf.py} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/{test_change_wf.py => test_build_wf.py} (100%) rename tests/{test_cwl_change_wf.py => test_cwl_build_wf.py} (99%) diff --git a/tests/test_change_wf.py b/tests/test_build_wf.py similarity index 100% rename from tests/test_change_wf.py rename to tests/test_build_wf.py diff --git a/tests/test_cwl_change_wf.py b/tests/test_cwl_build_wf.py similarity index 99% rename from tests/test_cwl_change_wf.py rename to tests/test_cwl_build_wf.py index ebd02fdfb..9bdbf4b72 100644 --- a/tests/test_cwl_change_wf.py +++ b/tests/test_cwl_build_wf.py @@ -29,7 +29,7 @@ ) from streamflow.workflow.step import CombinatorStep from tests.conftest import are_equals -from tests.test_change_wf import ( +from tests.test_build_wf import ( _base_step_test_process, _persistent_id_test, _set_workflow_in_combinator, From 1e950ae3381f11b5d5fc14fd80a1b6475ba8f898 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 16:14:12 +0100 Subject: [PATCH 38/69] moved persistent_id assignemtn inside DatabaseLoadingContext --- streamflow/core/deployment.py | 3 -- streamflow/core/persistence.py | 6 +-- streamflow/core/workflow.py | 2 - streamflow/persistence/loading_context.py | 51 +++++++---------------- 4 files changed, 17 insertions(+), 45 deletions(-) diff --git a/streamflow/core/deployment.py b/streamflow/core/deployment.py index ee3598b2d..dec909bb6 100644 --- a/streamflow/core/deployment.py +++ b/streamflow/core/deployment.py @@ -206,7 +206,6 @@ async def load( lazy=row["lazy"], workdir=row["workdir"], ) - obj.persistent_id = persistent_id loading_context.add_deployment(persistent_id, obj) return obj @@ -276,7 +275,6 @@ async def load( row = await context.database.get_target(persistent_id) type = cast(Type[Target], utils.get_class_from_name(row["type"])) obj = await type._load(context, row, loading_context) - obj.persistent_id = persistent_id loading_context.add_target(persistent_id, obj) return obj @@ -338,7 +336,6 @@ async def load( type=row["type"], config=json.loads(row["config"]), ) - obj.persistent_id = persistent_id loading_context.add_filter(persistent_id, obj) return obj diff --git a/streamflow/core/persistence.py b/streamflow/core/persistence.py index 266df82c1..180784a4a 100644 --- a/streamflow/core/persistence.py +++ b/streamflow/core/persistence.py @@ -70,11 +70,7 @@ async def load_token(self, context: StreamFlowContext, persistent_id: int): ... @abstractmethod - async def load_workflow( - self, - context: StreamFlowContext, - persistent_id: int, - ): + async def load_workflow(self, context: StreamFlowContext, persistent_id: int): ... diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 4b1c47526..2d4464045 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -545,7 +545,6 @@ async def load( row = await context.database.get_token(persistent_id) type = cast(Type[Token], utils.get_class_from_name(row["type"])) token = await type._load(context, row, loading_context) - token.persistent_id = persistent_id loading_context.add_token(persistent_id, token) return token @@ -672,7 +671,6 @@ async def load( workflow = cls( context=context, type=row["type"], config=params["config"], name=row["name"] ) - workflow.persistent_id = row["id"] loading_context.add_workflow(persistent_id, workflow) rows = await context.database.get_workflow_ports(persistent_id) workflow.ports = { diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 173387a68..441d05603 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -19,9 +19,11 @@ def __init__(self): self._workflows: MutableMapping[int, Workflow] = {} def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): + deployment.persistent_id = persistent_id self._deployment_configs[persistent_id] = deployment def add_filter(self, persistent_id: int, filter_config: FilterConfig): + filter_config.persistent_id = persistent_id self._filter_configs[persistent_id] = filter_config def add_port(self, persistent_id: int, port: Port): @@ -33,12 +35,15 @@ def add_step(self, persistent_id: int, step: Step): self._steps[persistent_id] = step def add_target(self, persistent_id: int, target: Target): + target.persistent_id = persistent_id self._targets[persistent_id] = target def add_token(self, persistent_id: int, token: Token): + token.persistent_id = persistent_id self._tokens[persistent_id] = token def add_workflow(self, persistent_id: int, workflow: Workflow): + workflow.persistent_id = persistent_id self._workflows[persistent_id] = workflow async def load_deployment(self, context: StreamFlowContext, persistent_id: int): @@ -77,17 +82,10 @@ async def load_workflow(self, context: StreamFlowContext, persistent_id: int): ) -class WorkflowLoader(DatabaseLoadingContext): +class WorkflowLoader(DefaultDatabaseLoadingContext): def __init__(self, workflow: Workflow): super().__init__() self.workflow: Workflow = workflow - self._tokens: MutableMapping[int, Token] = {} - - def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): - ... - - def add_filter(self, persistent_id: int, filter_config: FilterConfig): - ... def add_port(self, persistent_id: int, port: Port): ... @@ -95,26 +93,20 @@ def add_port(self, persistent_id: int, port: Port): def add_step(self, persistent_id: int, step: Step): ... - def add_target(self, persistent_id: int, target: Target): - ... - - def add_token(self, persistent_id: int, token: Token): - self._tokens[persistent_id] = token - def add_workflow(self, persistent_id: int, workflow: Workflow): ... - async def load_deployment(self, context: StreamFlowContext, persistent_id: int): - return await DeploymentConfig.load(context, persistent_id, self) - - async def load_filter(self, context: StreamFlowContext, persistent_id: int): - return await FilterConfig.load(context, persistent_id, self) + async def load_step(self, context: StreamFlowContext, persistent_id: int): + port_row = await context.database.get_step(persistent_id) + if port := self.workflow.ports.get(port_row["name"]): + return port + else: + # If the port is not available in the new workflow, a new one must be created + port = await Port.load(context, persistent_id, self) + self.workflow.ports[port.name] = port + return port - async def load_port( - self, - context: StreamFlowContext, - persistent_id: int, - ): + async def load_port(self, context: StreamFlowContext, persistent_id: int): port_row = await context.database.get_port(persistent_id) if port := self.workflow.ports.get(port_row["name"]): return port @@ -124,17 +116,6 @@ async def load_port( self.workflow.ports[port.name] = port return port - async def load_step(self, context: StreamFlowContext, persistent_id: int): - return await Step.load(context, persistent_id, self) - - async def load_target(self, context: StreamFlowContext, persistent_id: int): - return await Target.load(context, persistent_id, self) - - async def load_token(self, context: StreamFlowContext, persistent_id: int): - return self._tokens.get(persistent_id) or await Token.load( - context, persistent_id, self - ) - async def load_workflow( self, context: StreamFlowContext, From 05973a4af97efb8a459fc1f2ed4befb676ce607d Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 16:15:32 +0100 Subject: [PATCH 39/69] fixed signatures --- streamflow/core/persistence.py | 6 +----- streamflow/persistence/loading_context.py | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/streamflow/core/persistence.py b/streamflow/core/persistence.py index 180784a4a..2fd0b9ff3 100644 --- a/streamflow/core/persistence.py +++ b/streamflow/core/persistence.py @@ -50,11 +50,7 @@ async def load_filter(self, context: StreamFlowContext, persistent_id: int): ... @abstractmethod - async def load_port( - self, - context: StreamFlowContext, - persistent_id: int, - ): + async def load_port(self, context: StreamFlowContext, persistent_id: int): ... @abstractmethod diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 441d05603..0d019ab70 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -116,9 +116,5 @@ async def load_port(self, context: StreamFlowContext, persistent_id: int): self.workflow.ports[port.name] = port return port - async def load_workflow( - self, - context: StreamFlowContext, - persistent_id: int, - ): + async def load_workflow(self, context: StreamFlowContext, persistent_id: int): return self.workflow From 69ad6373100d96ede0b24a23cd6311075714e9a7 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 16:44:12 +0100 Subject: [PATCH 40/69] fix --- streamflow/persistence/loading_context.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 0d019ab70..885c13891 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -97,24 +97,22 @@ def add_workflow(self, persistent_id: int, workflow: Workflow): ... async def load_step(self, context: StreamFlowContext, persistent_id: int): - port_row = await context.database.get_step(persistent_id) - if port := self.workflow.ports.get(port_row["name"]): - return port - else: - # If the port is not available in the new workflow, a new one must be created - port = await Port.load(context, persistent_id, self) - self.workflow.ports[port.name] = port - return port + step_row = await context.database.get_step(persistent_id) + step = self.workflow.steps.get(step_row["name"]) + if step is None: + # If the step is not available in the new workflow, a new one must be created + step = await Step.load(context, persistent_id, self) + self.workflow.steps[step.name] = step + return step async def load_port(self, context: StreamFlowContext, persistent_id: int): port_row = await context.database.get_port(persistent_id) - if port := self.workflow.ports.get(port_row["name"]): - return port - else: + port = self.workflow.ports.get(port_row["name"]) + if port is None: # If the port is not available in the new workflow, a new one must be created port = await Port.load(context, persistent_id, self) self.workflow.ports[port.name] = port - return port + return port async def load_workflow(self, context: StreamFlowContext, persistent_id: int): return self.workflow From cfb39ea2754ec1beb46ee6120966270b111259f1 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 18:34:39 +0100 Subject: [PATCH 41/69] added doc --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 86 +++++++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index ae7e2cb76..909b3b0c6 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "caaa02b3477bf264a667c1684176ecf3f5e03e6c916d3107299c33670506239c" + CHECKSUM: "eddc9f6a1b78209950f4a9e5bac76625c72830581c4245cc6f0269a1dad221ff" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index f31045b23..18a018cfa 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -23,9 +23,12 @@ StreamFlow relies on a persistent ``Database`` to store all the metadata regardi ) -> None: ... -Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance. +Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance, furthermore it does not assign the ``persistent_id``. -The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context``. The latter keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. +The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context`` (see :ref:`DatabaseLoadingContext `). + +Database +======== The ``Database`` interface, defined in the ``streamflow.core.persistence`` module, contains all the methods to create, modify, and retrieve this metadata. Data deletion is unnecessary, as StreamFlow never removes existing records. Internally, the ``save`` and ``load`` methods call one or more of these methods to perform the desired operations. @@ -230,8 +233,9 @@ Each ``get_data`` method receives in input the identifier (commonly the ``persis The ``close`` method receives no input parameter and does not return anything. It frees stateful resources potentially allocated during the object’s lifetime, e.g., network or database connections. + Implementations -=============== +--------------- ====== ============================================ Type Class @@ -247,3 +251,79 @@ The database schema is structured as follows: .. literalinclude:: ../../../streamflow/persistence/schemas/sqlite.sql :language: sql + + +DatabaseLoadingContext +====================== +Workflow loading can be costly in term of time and memory but also tricky, with the possibility of deadlock. +The ``DatabaseLoadingContext`` supplies an interface, which allows to define classes that manage these problems. Good practice is to load the objects from these classes instead of use directly the entity ``load`` methods. + +.. code-block:: python + def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): + ... + + def add_filter(self, persistent_id: int, filter_config: FilterConfig): + ... + + def add_port(self, persistent_id: int, port: Port): + ... + + def add_step(self, persistent_id: int, step: Step): + ... + + def add_target(self, persistent_id: int, target: Target): + ... + + def add_token(self, persistent_id: int, token: Token): + ... + + def add_workflow(self, persistent_id: int, workflow: Workflow): + ... + + async def load_deployment(self, context: StreamFlowContext, persistent_id: int): + ... + + async def load_filter(self, context: StreamFlowContext, persistent_id: int): + ... + + async def load_port(self, context: StreamFlowContext, persistent_id: int): + ... + + async def load_step(self, context: StreamFlowContext, persistent_id: int): + ... + + async def load_target(self, context: StreamFlowContext, persistent_id: int): + ... + + async def load_token(self, context: StreamFlowContext, persistent_id: int): + ... + + async def load_workflow(self, context: StreamFlowContext, persistent_id: int): + ... + + +Implementations +--------------- + +==================================================================== ============================================================= +Name Class +==================================================================== ============================================================= +:ref:`DefaultDatabaseLoadingContext ` streamflow.persistent.loading_context.DefaultDatabaseLoadingContext +:ref:`WorkflowLoader ` streamflow.persistent.loading_context.WorkflowLoader +==================================================================== ============================================================= + +DefaultDatabaseLoadingContext +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The ``DefaultDatabaseLoadingContext`` keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. +Furthermore, it is in charge of assign the ``persistent_id`` when an entity is added in the cache with the ``add_deployment``, ``add_filter``, ``add_port``, ``add_step``, ``add_target``, ``add_token``, ``add_workflow`` methods. + + +WorkflowLoader +^^^^^^^^^^^^^^ +The ``WorkflowLoader`` allows the loading of the steps and ports of a workflow in a new one. +This feature can be helpful for the Fault Tolerance and the Resume features (see :ref:`Fault Tolerance `). +Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment```, ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. +This is done by loading the entity, keeping the ``persistent_id``in the case of a shared object, or creating a new ``persistent_id`` otherwise. +The ``WorkflowLoader`` extends the ``DefaultDatabaseLoadingContext`` class and overrides only the methods involving the ``step``, ``port`` and ``workflow`` entities. +The class has the ``workflow`` attribute, i.e., the new ``workflow`` instance, and the ``load_workflow`` method returns it. +Instead, the ``add_step`` and ``add_port`` methods do not set the ``persistent_id`` as their parent methods. From 17711c8e599c3159dbd684b8ff6afa2a67bbd3b5 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 18:37:30 +0100 Subject: [PATCH 42/69] change doc --- docs/source/ext/database.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 18a018cfa..abafeed11 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -255,8 +255,8 @@ The database schema is structured as follows: DatabaseLoadingContext ====================== -Workflow loading can be costly in term of time and memory but also tricky, with the possibility of deadlock. -The ``DatabaseLoadingContext`` supplies an interface, which allows to define classes that manage these problems. Good practice is to load the objects from these classes instead of use directly the entity ``load`` methods. +Workflow loading can be costly in terms of time and memory but also tricky, with the possibility of deadlock. +The ``DatabaseLoadingContext`` interface allows to define classes that manage these problems. Good practice is to load the objects from these classes instead of using directly the entity ``load`` methods. .. code-block:: python def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): From 078eaeb80c5cf738629a4f9f8c3c8bd8488f8568 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 18:38:00 +0100 Subject: [PATCH 43/69] checksum --- .github/workflows/ci-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 909b3b0c6..069e886cf 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "eddc9f6a1b78209950f4a9e5bac76625c72830581c4245cc6f0269a1dad221ff" + CHECKSUM: "f695f8db786d4d3845d5f2dc2ba3da9a3814a01b89b609198331b9cd116a7701" run: | cd docs/ HASH="$(make checksum | tail -n1)" From 69e7ce6b7fd4a66d3beb8a816b3495e85481fe48 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 18:58:13 +0100 Subject: [PATCH 44/69] fix --- tests/test_cwl_provenance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cwl_provenance.py b/tests/test_cwl_provenance.py index c928a949f..72f1e12f4 100644 --- a/tests/test_cwl_provenance.py +++ b/tests/test_cwl_provenance.py @@ -13,10 +13,10 @@ CWLConditionalStep, CWLEmptyScatterConditionalStep, CWLInputInjectorStep, - CWLLoopOutputAllStep, - CWLTransferStep, CWLLoopConditionalStep, + CWLLoopOutputAllStep, CWLLoopOutputLastStep, + CWLTransferStep, ) from streamflow.cwl.transformer import ( AllNonNullTransformer, From 4c9622db67b6adaf8ad33749ca44992ae7116ab7 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 18:58:25 +0100 Subject: [PATCH 45/69] fix --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 069e886cf..17acdcb4f 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "f695f8db786d4d3845d5f2dc2ba3da9a3814a01b89b609198331b9cd116a7701" + CHECKSUM: "e4564c457ec1b2d8290f26a1adc0548d10901d8233b8fef8956f6e363f9b212b" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index abafeed11..9312087f1 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -259,45 +259,59 @@ Workflow loading can be costly in terms of time and memory but also tricky, with The ``DatabaseLoadingContext`` interface allows to define classes that manage these problems. Good practice is to load the objects from these classes instead of using directly the entity ``load`` methods. .. code-block:: python + @abstractmethod def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): ... + @abstractmethod def add_filter(self, persistent_id: int, filter_config: FilterConfig): ... + @abstractmethod def add_port(self, persistent_id: int, port: Port): ... + @abstractmethod def add_step(self, persistent_id: int, step: Step): ... + @abstractmethod def add_target(self, persistent_id: int, target: Target): ... + @abstractmethod def add_token(self, persistent_id: int, token: Token): ... + @abstractmethod def add_workflow(self, persistent_id: int, workflow: Workflow): ... + @abstractmethod async def load_deployment(self, context: StreamFlowContext, persistent_id: int): ... + @abstractmethod async def load_filter(self, context: StreamFlowContext, persistent_id: int): ... + @abstractmethod async def load_port(self, context: StreamFlowContext, persistent_id: int): ... + @abstractmethod async def load_step(self, context: StreamFlowContext, persistent_id: int): ... + @abstractmethod async def load_target(self, context: StreamFlowContext, persistent_id: int): ... + @abstractmethod async def load_token(self, context: StreamFlowContext, persistent_id: int): ... + @abstractmethod async def load_workflow(self, context: StreamFlowContext, persistent_id: int): ... @@ -322,8 +336,12 @@ WorkflowLoader ^^^^^^^^^^^^^^ The ``WorkflowLoader`` allows the loading of the steps and ports of a workflow in a new one. This feature can be helpful for the Fault Tolerance and the Resume features (see :ref:`Fault Tolerance `). -Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment```, ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. -This is done by loading the entity, keeping the ``persistent_id``in the case of a shared object, or creating a new ``persistent_id`` otherwise. +Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment``` and ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. +This is done by loading the entity, keeping the ``persistent_id`` in the case of a shared object, or creating a new ``persistent_id`` otherwise. The ``WorkflowLoader`` extends the ``DefaultDatabaseLoadingContext`` class and overrides only the methods involving the ``step``, ``port`` and ``workflow`` entities. The class has the ``workflow`` attribute, i.e., the new ``workflow`` instance, and the ``load_workflow`` method returns it. -Instead, the ``add_step`` and ``add_port`` methods do not set the ``persistent_id`` as their parent methods. +Instead, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. +.. code-block:: python + def __init__(self, workflow: Workflow): + super().__init__() + self.workflow: Workflow = workflow From 47a9679558542b0ebae02529b5b72f1f61c4ecc7 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 19:06:34 +0100 Subject: [PATCH 46/69] fix sphinx error --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 17acdcb4f..183e5a199 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "e4564c457ec1b2d8290f26a1adc0548d10901d8233b8fef8956f6e363f9b212b" + CHECKSUM: "3c7e80d32fcbc53bb3062d2bb12060fe22b2498bc47c33eb037b7876c004dcc8" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 9312087f1..152077089 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -341,7 +341,9 @@ This is done by loading the entity, keeping the ``persistent_id`` in the case of The ``WorkflowLoader`` extends the ``DefaultDatabaseLoadingContext`` class and overrides only the methods involving the ``step``, ``port`` and ``workflow`` entities. The class has the ``workflow`` attribute, i.e., the new ``workflow`` instance, and the ``load_workflow`` method returns it. Instead, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. + .. code-block:: python + def __init__(self, workflow: Workflow): super().__init__() self.workflow: Workflow = workflow From 7341a2fdc6d9c534284b092c29131691da6cfdf4 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 19:25:22 +0100 Subject: [PATCH 47/69] fix sphinx error ... again --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 183e5a199..1fca507f3 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "3c7e80d32fcbc53bb3062d2bb12060fe22b2498bc47c33eb037b7876c004dcc8" + CHECKSUM: "9928f145446f60c76fc86b840c5630a66264a6cacde5ba28f36679ff743fb797" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 152077089..117c475b6 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -259,6 +259,7 @@ Workflow loading can be costly in terms of time and memory but also tricky, with The ``DatabaseLoadingContext`` interface allows to define classes that manage these problems. Good practice is to load the objects from these classes instead of using directly the entity ``load`` methods. .. code-block:: python + @abstractmethod def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): ... From 99259502d9732a7ee777c675ba7f6d5ca80f7f28 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 19:56:32 +0100 Subject: [PATCH 48/69] removed duplicate name --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 1fca507f3..1f7a747de 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "9928f145446f60c76fc86b840c5630a66264a6cacde5ba28f36679ff743fb797" + CHECKSUM: "f9d19e96da56a0460a248feb9e946f5f28ea4a8de1c32345e4e60e14f25beb96" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 117c475b6..35b5cf28f 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -27,8 +27,8 @@ Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context`` (see :ref:`DatabaseLoadingContext `). -Database -======== +Persistence +=========== The ``Database`` interface, defined in the ``streamflow.core.persistence`` module, contains all the methods to create, modify, and retrieve this metadata. Data deletion is unnecessary, as StreamFlow never removes existing records. Internally, the ``save`` and ``load`` methods call one or more of these methods to perform the desired operations. @@ -348,3 +348,4 @@ Instead, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set def __init__(self, workflow: Workflow): super().__init__() self.workflow: Workflow = workflow + From 4207324df69405cae675b45b5dc7865b5938b146 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 20:04:17 +0100 Subject: [PATCH 49/69] test --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 1f7a747de..952ae3b2e 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "f9d19e96da56a0460a248feb9e946f5f28ea4a8de1c32345e4e60e14f25beb96" + CHECKSUM: "71bc031b67fa3afeefca0987fb73fd0fd9c16a85a8c40d879017c976add4af99" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 35b5cf28f..141bc2c33 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -234,8 +234,8 @@ Each ``get_data`` method receives in input the identifier (commonly the ``persis The ``close`` method receives no input parameter and does not return anything. It frees stateful resources potentially allocated during the object’s lifetime, e.g., network or database connections. -Implementations ---------------- +Implementations1 +----------------- ====== ============================================ Type Class @@ -317,8 +317,8 @@ The ``DatabaseLoadingContext`` interface allows to define classes that manage th ... -Implementations ---------------- +Implementations2 +----------------- ==================================================================== ============================================================= Name Class From 6188f4ba475eb199e773c90f65935f7a33d77e63 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 20:11:08 +0100 Subject: [PATCH 50/69] test 2 --- .github/workflows/ci-tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 952ae3b2e..ffd126895 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -151,6 +151,7 @@ jobs: run: | cd docs/ HASH="$(make checksum | tail -n1)" + echo "Docs checksum is ${HASH}" test "${HASH}" == "${CHECKSUM}" test-flux: runs-on: ubuntu-22.04 From a24d838cafbe18bc0ea7bb27f00a86a959dfc5e6 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 20:18:52 +0100 Subject: [PATCH 51/69] fix 2 --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index ffd126895..e721e3f2b 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "71bc031b67fa3afeefca0987fb73fd0fd9c16a85a8c40d879017c976add4af99" + CHECKSUM: "f9d19e96da56a0460a248feb9e946f5f28ea4a8de1c32345e4e60e14f25beb96" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 141bc2c33..35b5cf28f 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -234,8 +234,8 @@ Each ``get_data`` method receives in input the identifier (commonly the ``persis The ``close`` method receives no input parameter and does not return anything. It frees stateful resources potentially allocated during the object’s lifetime, e.g., network or database connections. -Implementations1 ------------------ +Implementations +--------------- ====== ============================================ Type Class @@ -317,8 +317,8 @@ The ``DatabaseLoadingContext`` interface allows to define classes that manage th ... -Implementations2 ------------------ +Implementations +--------------- ==================================================================== ============================================================= Name Class From d4bea872cb9876a4236e0614c5ddd184b5465a2e Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 20:26:55 +0100 Subject: [PATCH 52/69] test undo --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 110 ++------------------------------ 2 files changed, 5 insertions(+), 107 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index e721e3f2b..7e688fe59 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "f9d19e96da56a0460a248feb9e946f5f28ea4a8de1c32345e4e60e14f25beb96" + CHECKSUM: "e1fe7e1bf81fd1ccc9d8271547213d17a47c2bbae588709b97ff072cb9ce2a01" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 35b5cf28f..08816fb80 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -23,12 +23,9 @@ StreamFlow relies on a persistent ``Database`` to store all the metadata regardi ) -> None: ... -Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance, furthermore it does not assign the ``persistent_id``. +Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance. -The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context`` (see :ref:`DatabaseLoadingContext `). - -Persistence -=========== +The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context``. The latter keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. The ``Database`` interface, defined in the ``streamflow.core.persistence`` module, contains all the methods to create, modify, and retrieve this metadata. Data deletion is unnecessary, as StreamFlow never removes existing records. Internally, the ``save`` and ``load`` methods call one or more of these methods to perform the desired operations. @@ -233,9 +230,8 @@ Each ``get_data`` method receives in input the identifier (commonly the ``persis The ``close`` method receives no input parameter and does not return anything. It frees stateful resources potentially allocated during the object’s lifetime, e.g., network or database connections. - Implementations ---------------- +=============== ====== ============================================ Type Class @@ -250,102 +246,4 @@ By default, StreamFlow uses a local ``SqliteDatabase`` instance for metadata per The database schema is structured as follows: .. literalinclude:: ../../../streamflow/persistence/schemas/sqlite.sql - :language: sql - - -DatabaseLoadingContext -====================== -Workflow loading can be costly in terms of time and memory but also tricky, with the possibility of deadlock. -The ``DatabaseLoadingContext`` interface allows to define classes that manage these problems. Good practice is to load the objects from these classes instead of using directly the entity ``load`` methods. - -.. code-block:: python - - @abstractmethod - def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): - ... - - @abstractmethod - def add_filter(self, persistent_id: int, filter_config: FilterConfig): - ... - - @abstractmethod - def add_port(self, persistent_id: int, port: Port): - ... - - @abstractmethod - def add_step(self, persistent_id: int, step: Step): - ... - - @abstractmethod - def add_target(self, persistent_id: int, target: Target): - ... - - @abstractmethod - def add_token(self, persistent_id: int, token: Token): - ... - - @abstractmethod - def add_workflow(self, persistent_id: int, workflow: Workflow): - ... - - @abstractmethod - async def load_deployment(self, context: StreamFlowContext, persistent_id: int): - ... - - @abstractmethod - async def load_filter(self, context: StreamFlowContext, persistent_id: int): - ... - - @abstractmethod - async def load_port(self, context: StreamFlowContext, persistent_id: int): - ... - - @abstractmethod - async def load_step(self, context: StreamFlowContext, persistent_id: int): - ... - - @abstractmethod - async def load_target(self, context: StreamFlowContext, persistent_id: int): - ... - - @abstractmethod - async def load_token(self, context: StreamFlowContext, persistent_id: int): - ... - - @abstractmethod - async def load_workflow(self, context: StreamFlowContext, persistent_id: int): - ... - - -Implementations ---------------- - -==================================================================== ============================================================= -Name Class -==================================================================== ============================================================= -:ref:`DefaultDatabaseLoadingContext ` streamflow.persistent.loading_context.DefaultDatabaseLoadingContext -:ref:`WorkflowLoader ` streamflow.persistent.loading_context.WorkflowLoader -==================================================================== ============================================================= - -DefaultDatabaseLoadingContext -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``DefaultDatabaseLoadingContext`` keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. -Furthermore, it is in charge of assign the ``persistent_id`` when an entity is added in the cache with the ``add_deployment``, ``add_filter``, ``add_port``, ``add_step``, ``add_target``, ``add_token``, ``add_workflow`` methods. - - -WorkflowLoader -^^^^^^^^^^^^^^ -The ``WorkflowLoader`` allows the loading of the steps and ports of a workflow in a new one. -This feature can be helpful for the Fault Tolerance and the Resume features (see :ref:`Fault Tolerance `). -Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment``` and ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. -This is done by loading the entity, keeping the ``persistent_id`` in the case of a shared object, or creating a new ``persistent_id`` otherwise. -The ``WorkflowLoader`` extends the ``DefaultDatabaseLoadingContext`` class and overrides only the methods involving the ``step``, ``port`` and ``workflow`` entities. -The class has the ``workflow`` attribute, i.e., the new ``workflow`` instance, and the ``load_workflow`` method returns it. -Instead, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. - -.. code-block:: python - - def __init__(self, workflow: Workflow): - super().__init__() - self.workflow: Workflow = workflow - + :language: sql \ No newline at end of file From 3619a0ee77fdaa4197e6173485070cf8b8c5d967 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 20:31:50 +0100 Subject: [PATCH 53/69] last try --- .github/workflows/ci-tests.yaml | 9 ++- docs/source/ext/database.rst | 110 ++++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 7e688fe59..17ebc3d37 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,10 +147,17 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "e1fe7e1bf81fd1ccc9d8271547213d17a47c2bbae588709b97ff072cb9ce2a01" + CHECKSUM: "f9d19e96da56a0460a248feb9e946f5f28ea4a8de1c32345e4e60e14f25beb96" run: | cd docs/ HASH="$(make checksum | tail -n1)" + find build/html/ \ + -not -name 'searchindex.js' \ + -not -name '*.woff' \ + -not -name '*.woff2' \ + -type f -print0 | \ + sort -zd | \ + xargs -r0 sha256sum echo "Docs checksum is ${HASH}" test "${HASH}" == "${CHECKSUM}" test-flux: diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 08816fb80..35b5cf28f 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -23,9 +23,12 @@ StreamFlow relies on a persistent ``Database`` to store all the metadata regardi ) -> None: ... -Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance. +Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance, furthermore it does not assign the ``persistent_id``. -The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context``. The latter keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. +The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context`` (see :ref:`DatabaseLoadingContext `). + +Persistence +=========== The ``Database`` interface, defined in the ``streamflow.core.persistence`` module, contains all the methods to create, modify, and retrieve this metadata. Data deletion is unnecessary, as StreamFlow never removes existing records. Internally, the ``save`` and ``load`` methods call one or more of these methods to perform the desired operations. @@ -230,8 +233,9 @@ Each ``get_data`` method receives in input the identifier (commonly the ``persis The ``close`` method receives no input parameter and does not return anything. It frees stateful resources potentially allocated during the object’s lifetime, e.g., network or database connections. + Implementations -=============== +--------------- ====== ============================================ Type Class @@ -246,4 +250,102 @@ By default, StreamFlow uses a local ``SqliteDatabase`` instance for metadata per The database schema is structured as follows: .. literalinclude:: ../../../streamflow/persistence/schemas/sqlite.sql - :language: sql \ No newline at end of file + :language: sql + + +DatabaseLoadingContext +====================== +Workflow loading can be costly in terms of time and memory but also tricky, with the possibility of deadlock. +The ``DatabaseLoadingContext`` interface allows to define classes that manage these problems. Good practice is to load the objects from these classes instead of using directly the entity ``load`` methods. + +.. code-block:: python + + @abstractmethod + def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): + ... + + @abstractmethod + def add_filter(self, persistent_id: int, filter_config: FilterConfig): + ... + + @abstractmethod + def add_port(self, persistent_id: int, port: Port): + ... + + @abstractmethod + def add_step(self, persistent_id: int, step: Step): + ... + + @abstractmethod + def add_target(self, persistent_id: int, target: Target): + ... + + @abstractmethod + def add_token(self, persistent_id: int, token: Token): + ... + + @abstractmethod + def add_workflow(self, persistent_id: int, workflow: Workflow): + ... + + @abstractmethod + async def load_deployment(self, context: StreamFlowContext, persistent_id: int): + ... + + @abstractmethod + async def load_filter(self, context: StreamFlowContext, persistent_id: int): + ... + + @abstractmethod + async def load_port(self, context: StreamFlowContext, persistent_id: int): + ... + + @abstractmethod + async def load_step(self, context: StreamFlowContext, persistent_id: int): + ... + + @abstractmethod + async def load_target(self, context: StreamFlowContext, persistent_id: int): + ... + + @abstractmethod + async def load_token(self, context: StreamFlowContext, persistent_id: int): + ... + + @abstractmethod + async def load_workflow(self, context: StreamFlowContext, persistent_id: int): + ... + + +Implementations +--------------- + +==================================================================== ============================================================= +Name Class +==================================================================== ============================================================= +:ref:`DefaultDatabaseLoadingContext ` streamflow.persistent.loading_context.DefaultDatabaseLoadingContext +:ref:`WorkflowLoader ` streamflow.persistent.loading_context.WorkflowLoader +==================================================================== ============================================================= + +DefaultDatabaseLoadingContext +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The ``DefaultDatabaseLoadingContext`` keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. +Furthermore, it is in charge of assign the ``persistent_id`` when an entity is added in the cache with the ``add_deployment``, ``add_filter``, ``add_port``, ``add_step``, ``add_target``, ``add_token``, ``add_workflow`` methods. + + +WorkflowLoader +^^^^^^^^^^^^^^ +The ``WorkflowLoader`` allows the loading of the steps and ports of a workflow in a new one. +This feature can be helpful for the Fault Tolerance and the Resume features (see :ref:`Fault Tolerance `). +Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment``` and ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. +This is done by loading the entity, keeping the ``persistent_id`` in the case of a shared object, or creating a new ``persistent_id`` otherwise. +The ``WorkflowLoader`` extends the ``DefaultDatabaseLoadingContext`` class and overrides only the methods involving the ``step``, ``port`` and ``workflow`` entities. +The class has the ``workflow`` attribute, i.e., the new ``workflow`` instance, and the ``load_workflow`` method returns it. +Instead, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. + +.. code-block:: python + + def __init__(self, workflow: Workflow): + super().__init__() + self.workflow: Workflow = workflow + From 86382e1676c4c30603fec7aa5904b66d42c41a75 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 22:31:02 +0100 Subject: [PATCH 54/69] test --- .github/workflows/ci-tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 17ebc3d37..f322d2ad8 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -158,6 +158,7 @@ jobs: -type f -print0 | \ sort -zd | \ xargs -r0 sha256sum + cat build/html/ext/plugins.html echo "Docs checksum is ${HASH}" test "${HASH}" == "${CHECKSUM}" test-flux: From 7ff6124a70ca55cb3ab9660af4f214eb520f3968 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 6 Jan 2024 22:41:54 +0100 Subject: [PATCH 55/69] resolved checksum --- .github/workflows/ci-tests.yaml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index f322d2ad8..68973259e 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,18 +147,10 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "f9d19e96da56a0460a248feb9e946f5f28ea4a8de1c32345e4e60e14f25beb96" + CHECKSUM: "b6317351be9e647112776f7c462400c2be7991110b4c46eaa7c49d04774cb22e" run: | cd docs/ HASH="$(make checksum | tail -n1)" - find build/html/ \ - -not -name 'searchindex.js' \ - -not -name '*.woff' \ - -not -name '*.woff2' \ - -type f -print0 | \ - sort -zd | \ - xargs -r0 sha256sum - cat build/html/ext/plugins.html echo "Docs checksum is ${HASH}" test "${HASH}" == "${CHECKSUM}" test-flux: From 0b80a59c039ef289de0b9c97457a1b25a5d885d5 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sun, 7 Jan 2024 20:07:00 +0100 Subject: [PATCH 56/69] renamed WorkflowLoader class into WorkflowBuilder --- streamflow/persistence/loading_context.py | 2 +- tests/test_build_wf.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 885c13891..77ca7f5f0 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -82,7 +82,7 @@ async def load_workflow(self, context: StreamFlowContext, persistent_id: int): ) -class WorkflowLoader(DefaultDatabaseLoadingContext): +class WorkflowBuilder(DefaultDatabaseLoadingContext): def __init__(self, workflow: Workflow): super().__init__() self.workflow: Workflow = workflow diff --git a/tests/test_build_wf.py b/tests/test_build_wf.py index 465fc07a9..db9e8ec20 100644 --- a/tests/test_build_wf.py +++ b/tests/test_build_wf.py @@ -9,7 +9,7 @@ from streamflow.core.workflow import Workflow, Port, Step from streamflow.cwl.command import CWLCommand, CWLCommandToken from streamflow.cwl.translator import _create_command_output_processor_base -from streamflow.persistence.loading_context import WorkflowLoader +from streamflow.persistence.loading_context import WorkflowBuilder from streamflow.workflow.combinator import LoopCombinator from streamflow.workflow.port import ConnectorPort, JobPort from streamflow.workflow.step import ( @@ -61,7 +61,7 @@ async def _clone_step(step, workflow, context): new_workflow = Workflow( context=context, type="cwl", name=utils.random_name(), config={} ) - loading_context = WorkflowLoader(workflow=new_workflow) + loading_context = WorkflowBuilder(workflow=new_workflow) new_step = await loading_context.load_step(context, step.persistent_id) new_workflow.steps[new_step.name] = new_step @@ -85,7 +85,7 @@ async def _general_test_port(context: StreamFlowContext, cls_port: Type[Port]): new_workflow = Workflow( context=context, type="cwl", name=utils.random_name(), config={} ) - loading_context = WorkflowLoader(workflow=new_workflow) + loading_context = WorkflowBuilder(workflow=new_workflow) new_port = await loading_context.load_port(context, port.persistent_id) new_workflow.ports[new_port.name] = new_port await new_workflow.save(context) From e0385327a641242abaadbd0e1570a8975559b6a0 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sun, 7 Jan 2024 20:10:03 +0100 Subject: [PATCH 57/69] fix doc --- docs/source/ext/database.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 35b5cf28f..75d980738 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -324,22 +324,21 @@ Implementations Name Class ==================================================================== ============================================================= :ref:`DefaultDatabaseLoadingContext ` streamflow.persistent.loading_context.DefaultDatabaseLoadingContext -:ref:`WorkflowLoader ` streamflow.persistent.loading_context.WorkflowLoader +:ref:`WorkflowBuilder ` streamflow.persistent.loading_context.WorkflowBuilder ==================================================================== ============================================================= DefaultDatabaseLoadingContext ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``DefaultDatabaseLoadingContext`` keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. -Furthermore, it is in charge of assign the ``persistent_id`` when an entity is added in the cache with the ``add_deployment``, ``add_filter``, ``add_port``, ``add_step``, ``add_target``, ``add_token``, ``add_workflow`` methods. +Furthermore, it is in charge of assign the ``persistent_id`` when an entity is added to the cache with the ``add_*`` methods. -WorkflowLoader +WorkflowBuilder ^^^^^^^^^^^^^^ -The ``WorkflowLoader`` allows the loading of the steps and ports of a workflow in a new one. -This feature can be helpful for the Fault Tolerance and the Resume features (see :ref:`Fault Tolerance `). +The ``WorkflowBuilder`` class loads the steps and ports of an existing workflow from the database and inserts them into a new workflow object, which is passed as argument to the constructor. Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment``` and ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. This is done by loading the entity, keeping the ``persistent_id`` in the case of a shared object, or creating a new ``persistent_id`` otherwise. -The ``WorkflowLoader`` extends the ``DefaultDatabaseLoadingContext`` class and overrides only the methods involving the ``step``, ``port`` and ``workflow`` entities. +The ``WorkflowBuilder`` class extends the ``DefaultDatabaseLoadingContext`` class and overwrites only the methods involving the ``step``, ``port``, and ``workflow`` entities. The class has the ``workflow`` attribute, i.e., the new ``workflow`` instance, and the ``load_workflow`` method returns it. Instead, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. From 848d208080086242789c9eb435d0679c98c62adf Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sun, 7 Jan 2024 20:11:12 +0100 Subject: [PATCH 58/69] fix empty method notation --- streamflow/persistence/loading_context.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 77ca7f5f0..419dbcc1c 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -88,13 +88,13 @@ def __init__(self, workflow: Workflow): self.workflow: Workflow = workflow def add_port(self, persistent_id: int, port: Port): - ... + pass def add_step(self, persistent_id: int, step: Step): - ... + pass def add_workflow(self, persistent_id: int, workflow: Workflow): - ... + pass async def load_step(self, context: StreamFlowContext, persistent_id: int): step_row = await context.database.get_step(persistent_id) From f63fac8f3be60d6fc4c2ed9ad90f0ab914b7b005 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Mon, 8 Jan 2024 10:44:27 +0100 Subject: [PATCH 59/69] workflow load v1 --- streamflow/persistence/loading_context.py | 4 + tests/test_build_wf.py | 424 +++++++++++++--------- 2 files changed, 248 insertions(+), 180 deletions(-) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 419dbcc1c..41d4ce2ec 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -116,3 +116,7 @@ async def load_port(self, context: StreamFlowContext, persistent_id: int): async def load_workflow(self, context: StreamFlowContext, persistent_id: int): return self.workflow + + async def load_full_workflow(self, context: StreamFlowContext, persistent_id: int): + await Workflow.load(context, persistent_id, self) + diff --git a/tests/test_build_wf.py b/tests/test_build_wf.py index db9e8ec20..cc2e51040 100644 --- a/tests/test_build_wf.py +++ b/tests/test_build_wf.py @@ -130,41 +130,22 @@ def _workflow_in_combinator_test(original_combinator, new_combinator): ): _workflow_in_combinator_test(original_inner, new_inner) - -@pytest.mark.asyncio -async def test_port(context: StreamFlowContext): - """Test saving Port on database and re-load it in a new Workflow""" - await _general_test_port(context, Port) - - -@pytest.mark.asyncio -async def test_job_port(context: StreamFlowContext): - """Test saving JobPort on database and re-load it in a new Workflow""" - await _general_test_port(context, JobPort) - - -@pytest.mark.asyncio -async def test_connection_port(context: StreamFlowContext): - """Test saving ConnectorPort on database and re-load it in a new Workflow""" - await _general_test_port(context, ConnectorPort) - - @pytest.mark.asyncio -async def test_execute_step(context: StreamFlowContext): - """Test saving ExecuteStep on database and re-load it in a new Workflow""" +async def test_workflow(context: StreamFlowContext): + """Test saving Workflow on database and load its elements in a new Workflow""" workflow, (job_port, in_port, out_port) = await create_workflow(context, num_port=3) in_port_name = "in-1" out_port_name = "out-1" - step = workflow.create_step( + exec_step = workflow.create_step( cls=ExecuteStep, name=utils.random_name(), job_port=cast(JobPort, job_port) ) - step.command = CWLCommand( - step=step, + exec_step.command = CWLCommand( + step=exec_step, base_command=["echo"], command_tokens=[CWLCommandToken(name=in_port_name, value=None)], ) - step.add_output_port( + exec_step.add_output_port( out_port_name, out_port, _create_command_output_processor_base( @@ -176,166 +157,249 @@ async def test_execute_step(context: StreamFlowContext): {"hints": {}, "requirements": {}}, ), ) - step.add_input_port(in_port_name, in_port) + exec_step.add_input_port(in_port_name, in_port) await workflow.save(context) - new_workflow, new_step = await _clone_step(step, workflow, context) - _persistent_id_test(workflow, new_workflow, step, new_step) + new_workflow = (await create_workflow(context, num_port=0))[0] + loading_context = WorkflowBuilder(new_workflow) + await loading_context.load_full_workflow(context, workflow.persistent_id) + + for step in new_workflow.steps.values(): + assert step.workflow == new_workflow + for port in new_workflow.steps.values(): + assert port.workflow == new_workflow - assert step.command.step.persistent_id != new_step.command.step.persistent_id - step.command.step = None - new_step.command.step = None for original_processor, new_processor in zip( - step.output_processors.values(), new_step.output_processors.values() + exec_step.output_processors.values(), new_workflow.steps[exec_step.name].output_processors.values() ): - assert ( - original_processor.workflow.persistent_id - != new_processor.workflow.persistent_id - ) + assert original_processor.workflow == workflow + assert new_processor.workflow == new_workflow _set_to_none(original_processor, wf_to_none=True) _set_to_none(new_processor, wf_to_none=True) - _set_to_none(step, id_to_none=True, wf_to_none=True) - _set_to_none(new_step, id_to_none=True, wf_to_none=True) - assert are_equals(step, new_step) - - -@pytest.mark.asyncio -async def test_schedule_step(context: StreamFlowContext): - """Test saving ScheduleStep on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=0))[0] - deploy_step = create_deploy_step(workflow) - nof_deployments = 2 - step = create_schedule_step( - workflow, - [deploy_step for _ in range(nof_deployments)], - BindingConfig( - targets=[LocalTarget() for _ in range(nof_deployments)], - filters=[ - FilterConfig(config={}, name=utils.random_name(), type="shuffle") - for _ in range(nof_deployments) - ], - ), - ) - await workflow.save(context) - new_workflow, new_step = await _clone_step(step, workflow, context) - _persistent_id_test(workflow, new_workflow, step, new_step) - - for original_filter, new_filter in zip( - step.binding_config.filters, new_step.binding_config.filters - ): - # Config are read-only so workflows can share the same - assert original_filter.persistent_id == new_filter.persistent_id - _set_to_none(original_filter, id_to_none=True, wf_to_none=True) - _set_to_none(new_filter, id_to_none=True, wf_to_none=True) - _set_to_none(step, id_to_none=True, wf_to_none=True) - _set_to_none(new_step, id_to_none=True, wf_to_none=True) - assert are_equals(step, new_step) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "combinator", - [ - get_dot_combinator(), - get_cartesian_product_combinator(), - get_loop_terminator_combinator(), - get_nested_crossproduct(), - ], - ids=[ - "dot_combinator", - "cartesian_product_combinator", - "loop_termination_combinator", - "nested_crossproduct", - ], -) -async def test_combinator_step(context: StreamFlowContext, combinator: Combinator): - """Test saving CombinatorStep on database and re-load it in a new Workflow""" - workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( - context, num_port=4 - ) - _set_workflow_in_combinator(combinator, workflow) - step = workflow.create_step( - cls=CombinatorStep, - name=utils.random_name() + "-combinator", - combinator=combinator, - ) - port_name = "test" - step.add_input_port(port_name, in_port) - step.add_output_port(port_name, out_port) - - port_name_2 = f"{port_name}_2" - step.add_input_port(port_name_2, in_port_2) - step.add_output_port(port_name_2, out_port_2) - - await workflow.save(context) - new_workflow, new_step = await _clone_step(step, workflow, context) - _persistent_id_test(workflow, new_workflow, step, new_step) - - _set_to_none(step, id_to_none=True, wf_to_none=True) - _set_to_none(new_step, id_to_none=True, wf_to_none=True) - _workflow_in_combinator_test(step.combinator, new_step.combinator) - _set_workflow_in_combinator(step.combinator, None) - _set_workflow_in_combinator(new_step.combinator, None) - assert are_equals(step, new_step) - - -@pytest.mark.asyncio -async def test_loop_combinator_step(context: StreamFlowContext): - """Test saving LoopCombinatorStep on database and re-load it in a new Workflow""" - workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( - context, num_port=4 - ) - name = utils.random_name() - step = workflow.create_step( - cls=LoopCombinatorStep, - name=name + "-combinator", - combinator=LoopCombinator(name=name, workflow=workflow), - ) - port_name = "test" - step.add_input_port(port_name, in_port) - step.add_output_port(port_name, out_port) - - port_name_2 = f"{port_name}_2" - step.add_input_port(port_name_2, in_port_2) - step.add_output_port(port_name_2, out_port_2) - - await workflow.save(context) - new_workflow, new_step = await _clone_step(step, workflow, context) - _persistent_id_test(workflow, new_workflow, step, new_step) - - _set_to_none(step, id_to_none=True, wf_to_none=True) - _set_to_none(new_step, id_to_none=True, wf_to_none=True) - _workflow_in_combinator_test(step.combinator, new_step.combinator) - _set_workflow_in_combinator(step.combinator, None) - _set_workflow_in_combinator(new_step.combinator, None) - assert are_equals(step, new_step) - - -@pytest.mark.asyncio -async def test_deploy_step(context: StreamFlowContext): - """Test saving DeployStep on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=0))[0] - step = create_deploy_step(workflow) - await workflow.save(context) - new_workflow, new_step = await _clone_step(step, workflow, context) - _persistent_id_test(workflow, new_workflow, step, new_step) - - -@pytest.mark.asyncio -async def test_gather_step(context: StreamFlowContext): - """Test saving GatherStep on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=0))[0] - await _base_step_test_process( - workflow, - GatherStep, - {"name": utils.random_name() + "-gather", "depth": 1}, - context, - ) + # set to none some attributes in new_workflow + new_workflow.name = None + new_workflow.persistent_id = None + for new_step in new_workflow.steps.values(): + _set_to_none(new_step, id_to_none=True, wf_to_none=True) + for new_port in new_workflow.ports.values(): + _set_to_none(new_port, id_to_none=True, wf_to_none=True) -@pytest.mark.asyncio -async def test_scatter_step(context: StreamFlowContext): - """Test saving ScatterStep on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=0))[0] - await _base_step_test_process( - workflow, ScatterStep, {"name": utils.random_name() + "-scatter"}, context - ) + # set to none some attributes in workflow + workflow.name = None + workflow.persistent_id = None + for step in workflow.steps.values(): + _set_to_none(step, id_to_none=True, wf_to_none=True) + for port in workflow.ports.values(): + _set_to_none(port, id_to_none=True, wf_to_none=True) + assert are_equals(workflow, new_workflow) + + +# +# @pytest.mark.asyncio +# async def test_port(context: StreamFlowContext): +# """Test saving Port on database and re-load it in a new Workflow""" +# await _general_test_port(context, Port) +# +# +# @pytest.mark.asyncio +# async def test_job_port(context: StreamFlowContext): +# """Test saving JobPort on database and re-load it in a new Workflow""" +# await _general_test_port(context, JobPort) +# +# +# @pytest.mark.asyncio +# async def test_connection_port(context: StreamFlowContext): +# """Test saving ConnectorPort on database and re-load it in a new Workflow""" +# await _general_test_port(context, ConnectorPort) +# +# +# @pytest.mark.asyncio +# async def test_execute_step(context: StreamFlowContext): +# """Test saving ExecuteStep on database and re-load it in a new Workflow""" +# workflow, (job_port, in_port, out_port) = await create_workflow(context, num_port=3) +# +# in_port_name = "in-1" +# out_port_name = "out-1" +# step = workflow.create_step( +# cls=ExecuteStep, name=utils.random_name(), job_port=cast(JobPort, job_port) +# ) +# step.command = CWLCommand( +# step=step, +# base_command=["echo"], +# command_tokens=[CWLCommandToken(name=in_port_name, value=None)], +# ) +# step.add_output_port( +# out_port_name, +# out_port, +# _create_command_output_processor_base( +# out_port.name, +# workflow, +# None, +# "string", +# {}, +# {"hints": {}, "requirements": {}}, +# ), +# ) +# step.add_input_port(in_port_name, in_port) +# await workflow.save(context) +# new_workflow, new_step = await _clone_step(step, workflow, context) +# _persistent_id_test(workflow, new_workflow, step, new_step) +# +# assert step.command.step.persistent_id != new_step.command.step.persistent_id +# step.command.step = None +# new_step.command.step = None +# for original_processor, new_processor in zip( +# step.output_processors.values(), new_step.output_processors.values() +# ): +# assert ( +# original_processor.workflow.persistent_id +# != new_processor.workflow.persistent_id +# ) +# _set_to_none(original_processor, wf_to_none=True) +# _set_to_none(new_processor, wf_to_none=True) +# _set_to_none(step, id_to_none=True, wf_to_none=True) +# _set_to_none(new_step, id_to_none=True, wf_to_none=True) +# assert are_equals(step, new_step) +# +# +# @pytest.mark.asyncio +# async def test_schedule_step(context: StreamFlowContext): +# """Test saving ScheduleStep on database and re-load it in a new Workflow""" +# workflow = (await create_workflow(context, num_port=0))[0] +# deploy_step = create_deploy_step(workflow) +# nof_deployments = 2 +# step = create_schedule_step( +# workflow, +# [deploy_step for _ in range(nof_deployments)], +# BindingConfig( +# targets=[LocalTarget() for _ in range(nof_deployments)], +# filters=[ +# FilterConfig(config={}, name=utils.random_name(), type="shuffle") +# for _ in range(nof_deployments) +# ], +# ), +# ) +# await workflow.save(context) +# new_workflow, new_step = await _clone_step(step, workflow, context) +# _persistent_id_test(workflow, new_workflow, step, new_step) +# +# for original_filter, new_filter in zip( +# step.binding_config.filters, new_step.binding_config.filters +# ): +# # Config are read-only so workflows can share the same +# assert original_filter.persistent_id == new_filter.persistent_id +# _set_to_none(original_filter, id_to_none=True, wf_to_none=True) +# _set_to_none(new_filter, id_to_none=True, wf_to_none=True) +# _set_to_none(step, id_to_none=True, wf_to_none=True) +# _set_to_none(new_step, id_to_none=True, wf_to_none=True) +# assert are_equals(step, new_step) +# +# +# @pytest.mark.asyncio +# @pytest.mark.parametrize( +# "combinator", +# [ +# get_dot_combinator(), +# get_cartesian_product_combinator(), +# get_loop_terminator_combinator(), +# get_nested_crossproduct(), +# ], +# ids=[ +# "dot_combinator", +# "cartesian_product_combinator", +# "loop_termination_combinator", +# "nested_crossproduct", +# ], +# ) +# async def test_combinator_step(context: StreamFlowContext, combinator: Combinator): +# """Test saving CombinatorStep on database and re-load it in a new Workflow""" +# workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( +# context, num_port=4 +# ) +# _set_workflow_in_combinator(combinator, workflow) +# step = workflow.create_step( +# cls=CombinatorStep, +# name=utils.random_name() + "-combinator", +# combinator=combinator, +# ) +# port_name = "test" +# step.add_input_port(port_name, in_port) +# step.add_output_port(port_name, out_port) +# +# port_name_2 = f"{port_name}_2" +# step.add_input_port(port_name_2, in_port_2) +# step.add_output_port(port_name_2, out_port_2) +# +# await workflow.save(context) +# new_workflow, new_step = await _clone_step(step, workflow, context) +# _persistent_id_test(workflow, new_workflow, step, new_step) +# +# _set_to_none(step, id_to_none=True, wf_to_none=True) +# _set_to_none(new_step, id_to_none=True, wf_to_none=True) +# _workflow_in_combinator_test(step.combinator, new_step.combinator) +# _set_workflow_in_combinator(step.combinator, None) +# _set_workflow_in_combinator(new_step.combinator, None) +# assert are_equals(step, new_step) +# +# +# @pytest.mark.asyncio +# async def test_loop_combinator_step(context: StreamFlowContext): +# """Test saving LoopCombinatorStep on database and re-load it in a new Workflow""" +# workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( +# context, num_port=4 +# ) +# name = utils.random_name() +# step = workflow.create_step( +# cls=LoopCombinatorStep, +# name=name + "-combinator", +# combinator=LoopCombinator(name=name, workflow=workflow), +# ) +# port_name = "test" +# step.add_input_port(port_name, in_port) +# step.add_output_port(port_name, out_port) +# +# port_name_2 = f"{port_name}_2" +# step.add_input_port(port_name_2, in_port_2) +# step.add_output_port(port_name_2, out_port_2) +# +# await workflow.save(context) +# new_workflow, new_step = await _clone_step(step, workflow, context) +# _persistent_id_test(workflow, new_workflow, step, new_step) +# +# _set_to_none(step, id_to_none=True, wf_to_none=True) +# _set_to_none(new_step, id_to_none=True, wf_to_none=True) +# _workflow_in_combinator_test(step.combinator, new_step.combinator) +# _set_workflow_in_combinator(step.combinator, None) +# _set_workflow_in_combinator(new_step.combinator, None) +# assert are_equals(step, new_step) +# +# +# @pytest.mark.asyncio +# async def test_deploy_step(context: StreamFlowContext): +# """Test saving DeployStep on database and re-load it in a new Workflow""" +# workflow = (await create_workflow(context, num_port=0))[0] +# step = create_deploy_step(workflow) +# await workflow.save(context) +# new_workflow, new_step = await _clone_step(step, workflow, context) +# _persistent_id_test(workflow, new_workflow, step, new_step) +# +# +# @pytest.mark.asyncio +# async def test_gather_step(context: StreamFlowContext): +# """Test saving GatherStep on database and re-load it in a new Workflow""" +# workflow = (await create_workflow(context, num_port=0))[0] +# await _base_step_test_process( +# workflow, +# GatherStep, +# {"name": utils.random_name() + "-gather", "depth": 1}, +# context, +# ) +# +# +# @pytest.mark.asyncio +# async def test_scatter_step(context: StreamFlowContext): +# """Test saving ScatterStep on database and re-load it in a new Workflow""" +# workflow = (await create_workflow(context, num_port=0))[0] +# await _base_step_test_process( +# workflow, ScatterStep, {"name": utils.random_name() + "-scatter"}, context +# ) From a399f2cc9471d3b03f0d098286e1bcef9fb6f586 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Mon, 8 Jan 2024 11:41:32 +0100 Subject: [PATCH 60/69] workflow load v2 --- streamflow/persistence/loading_context.py | 11 ++++++----- tests/test_build_wf.py | 9 ++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 41d4ce2ec..27af50c73 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -83,9 +83,11 @@ async def load_workflow(self, context: StreamFlowContext, persistent_id: int): class WorkflowBuilder(DefaultDatabaseLoadingContext): - def __init__(self, workflow: Workflow): + def __init__(self, workflow: Workflow, load_entire_wf: bool = False): super().__init__() + self.load_entire_wf: bool = load_entire_wf self.workflow: Workflow = workflow + self._wf_loaded = False def add_port(self, persistent_id: int, port: Port): pass @@ -115,8 +117,7 @@ async def load_port(self, context: StreamFlowContext, persistent_id: int): return port async def load_workflow(self, context: StreamFlowContext, persistent_id: int): + if self.load_entire_wf and not self._wf_loaded: + self._wf_loaded = True + await Workflow.load(context, persistent_id, self) return self.workflow - - async def load_full_workflow(self, context: StreamFlowContext, persistent_id: int): - await Workflow.load(context, persistent_id, self) - diff --git a/tests/test_build_wf.py b/tests/test_build_wf.py index cc2e51040..2f786aec1 100644 --- a/tests/test_build_wf.py +++ b/tests/test_build_wf.py @@ -74,9 +74,7 @@ async def _clone_step(step, workflow, context): async def _general_test_port(context: StreamFlowContext, cls_port: Type[Port]): - workflow = Workflow( - context=context, type="cwl", name=utils.random_name(), config={} - ) + workflow, ports = await create_workflow(context) port = workflow.create_port(cls_port) await workflow.save(context) assert workflow.persistent_id @@ -85,10 +83,11 @@ async def _general_test_port(context: StreamFlowContext, cls_port: Type[Port]): new_workflow = Workflow( context=context, type="cwl", name=utils.random_name(), config={} ) - loading_context = WorkflowBuilder(workflow=new_workflow) + loading_context = WorkflowBuilder(new_workflow) + new_port = await loading_context.load_port(context, port.persistent_id) - new_workflow.ports[new_port.name] = new_port await new_workflow.save(context) + assert len(new_workflow.ports) == 1 _persistent_id_test(workflow, new_workflow, port, new_port) _set_to_none(port, id_to_none=True, wf_to_none=True) _set_to_none(new_port, id_to_none=True, wf_to_none=True) From 0031e265cab5ca44bd290bcfae960abf51c3d982 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Mon, 8 Jan 2024 11:41:51 +0100 Subject: [PATCH 61/69] refactor in alphabetic order the tests --- tests/test_build_wf.py | 427 ++++++++++++++++++------------------- tests/test_cwl_build_wf.py | 226 ++++++++++---------- 2 files changed, 326 insertions(+), 327 deletions(-) diff --git a/tests/test_build_wf.py b/tests/test_build_wf.py index 2f786aec1..9aed6cad2 100644 --- a/tests/test_build_wf.py +++ b/tests/test_build_wf.py @@ -129,6 +129,209 @@ def _workflow_in_combinator_test(original_combinator, new_combinator): ): _workflow_in_combinator_test(original_inner, new_inner) + +# a b c d e f g h i j k l m n o p q r s t u v w x y z + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "combinator", + [ + get_cartesian_product_combinator(), + get_dot_combinator(), + get_loop_terminator_combinator(), + get_nested_crossproduct(), + ], + ids=[ + "cartesian_product_combinator", + "dot_combinator", + "loop_termination_combinator", + "nested_crossproduct", + ], +) +async def test_combinator_step(context: StreamFlowContext, combinator: Combinator): + """Test saving CombinatorStep on database and re-load it in a new Workflow""" + workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( + context, num_port=4 + ) + _set_workflow_in_combinator(combinator, workflow) + step = workflow.create_step( + cls=CombinatorStep, + name=utils.random_name() + "-combinator", + combinator=combinator, + ) + port_name = "test" + step.add_input_port(port_name, in_port) + step.add_output_port(port_name, out_port) + + port_name_2 = f"{port_name}_2" + step.add_input_port(port_name_2, in_port_2) + step.add_output_port(port_name_2, out_port_2) + + await workflow.save(context) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) + + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) + _workflow_in_combinator_test(step.combinator, new_step.combinator) + _set_workflow_in_combinator(step.combinator, None) + _set_workflow_in_combinator(new_step.combinator, None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_deploy_step(context: StreamFlowContext): + """Test saving DeployStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=0))[0] + step = create_deploy_step(workflow) + await workflow.save(context) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) + + +@pytest.mark.asyncio +async def test_execute_step(context: StreamFlowContext): + """Test saving ExecuteStep on database and re-load it in a new Workflow""" + workflow, (job_port, in_port, out_port) = await create_workflow(context, num_port=3) + + in_port_name = "in-1" + out_port_name = "out-1" + step = workflow.create_step( + cls=ExecuteStep, name=utils.random_name(), job_port=cast(JobPort, job_port) + ) + step.command = CWLCommand( + step=step, + base_command=["echo"], + command_tokens=[CWLCommandToken(name=in_port_name, value=None)], + ) + step.add_output_port( + out_port_name, + out_port, + _create_command_output_processor_base( + out_port.name, + workflow, + None, + "string", + {}, + {"hints": {}, "requirements": {}}, + ), + ) + step.add_input_port(in_port_name, in_port) + await workflow.save(context) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) + + assert step.command.step.persistent_id != new_step.command.step.persistent_id + step.command.step = None + new_step.command.step = None + for original_processor, new_processor in zip( + step.output_processors.values(), new_step.output_processors.values() + ): + assert ( + original_processor.workflow.persistent_id + != new_processor.workflow.persistent_id + ) + _set_to_none(original_processor, wf_to_none=True) + _set_to_none(new_processor, wf_to_none=True) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_gather_step(context: StreamFlowContext): + """Test saving GatherStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=0))[0] + await _base_step_test_process( + workflow, + GatherStep, + {"name": utils.random_name() + "-gather", "depth": 1}, + context, + ) + + +@pytest.mark.asyncio +async def test_loop_combinator_step(context: StreamFlowContext): + """Test saving LoopCombinatorStep on database and re-load it in a new Workflow""" + workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( + context, num_port=4 + ) + name = utils.random_name() + step = workflow.create_step( + cls=LoopCombinatorStep, + name=name + "-combinator", + combinator=LoopCombinator(name=name, workflow=workflow), + ) + port_name = "test" + step.add_input_port(port_name, in_port) + step.add_output_port(port_name, out_port) + + port_name_2 = f"{port_name}_2" + step.add_input_port(port_name_2, in_port_2) + step.add_output_port(port_name_2, out_port_2) + + await workflow.save(context) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) + + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) + _workflow_in_combinator_test(step.combinator, new_step.combinator) + _set_workflow_in_combinator(step.combinator, None) + _set_workflow_in_combinator(new_step.combinator, None) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +async def test_scatter_step(context: StreamFlowContext): + """Test saving ScatterStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=0))[0] + await _base_step_test_process( + workflow, ScatterStep, {"name": utils.random_name() + "-scatter"}, context + ) + + +@pytest.mark.asyncio +async def test_schedule_step(context: StreamFlowContext): + """Test saving ScheduleStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=0))[0] + deploy_step = create_deploy_step(workflow) + nof_deployments = 2 + step = create_schedule_step( + workflow, + [deploy_step for _ in range(nof_deployments)], + BindingConfig( + targets=[LocalTarget() for _ in range(nof_deployments)], + filters=[ + FilterConfig(config={}, name=utils.random_name(), type="shuffle") + for _ in range(nof_deployments) + ], + ), + ) + await workflow.save(context) + new_workflow, new_step = await _clone_step(step, workflow, context) + _persistent_id_test(workflow, new_workflow, step, new_step) + + for original_filter, new_filter in zip( + step.binding_config.filters, new_step.binding_config.filters + ): + # Config are read-only so workflows can share the same + assert original_filter.persistent_id == new_filter.persistent_id + _set_to_none(original_filter, id_to_none=True, wf_to_none=True) + _set_to_none(new_filter, id_to_none=True, wf_to_none=True) + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) + assert are_equals(step, new_step) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("port_cls", [Port, JobPort, ConnectorPort]) +async def test_port(context: StreamFlowContext, port_cls: Type[Port]): + """Test saving Port on database and re-load it in a new Workflow""" + await _general_test_port(context, port_cls) + + @pytest.mark.asyncio async def test_workflow(context: StreamFlowContext): """Test saving Workflow on database and load its elements in a new Workflow""" @@ -158,17 +361,22 @@ async def test_workflow(context: StreamFlowContext): ) exec_step.add_input_port(in_port_name, in_port) await workflow.save(context) + new_workflow = (await create_workflow(context, num_port=0))[0] - loading_context = WorkflowBuilder(new_workflow) - await loading_context.load_full_workflow(context, workflow.persistent_id) + loading_context = WorkflowBuilder(new_workflow, load_entire_wf=True) + new_workflow = await loading_context.load_workflow(context, workflow.persistent_id) + + assert new_workflow.name != workflow.name + # test every object has the right workflow reference for step in new_workflow.steps.values(): assert step.workflow == new_workflow for port in new_workflow.steps.values(): assert port.workflow == new_workflow for original_processor, new_processor in zip( - exec_step.output_processors.values(), new_workflow.steps[exec_step.name].output_processors.values() + exec_step.output_processors.values(), + new_workflow.steps[exec_step.name].output_processors.values(), ): assert original_processor.workflow == workflow assert new_processor.workflow == new_workflow @@ -190,215 +398,6 @@ async def test_workflow(context: StreamFlowContext): _set_to_none(step, id_to_none=True, wf_to_none=True) for port in workflow.ports.values(): _set_to_none(port, id_to_none=True, wf_to_none=True) - assert are_equals(workflow, new_workflow) - -# -# @pytest.mark.asyncio -# async def test_port(context: StreamFlowContext): -# """Test saving Port on database and re-load it in a new Workflow""" -# await _general_test_port(context, Port) -# -# -# @pytest.mark.asyncio -# async def test_job_port(context: StreamFlowContext): -# """Test saving JobPort on database and re-load it in a new Workflow""" -# await _general_test_port(context, JobPort) -# -# -# @pytest.mark.asyncio -# async def test_connection_port(context: StreamFlowContext): -# """Test saving ConnectorPort on database and re-load it in a new Workflow""" -# await _general_test_port(context, ConnectorPort) -# -# -# @pytest.mark.asyncio -# async def test_execute_step(context: StreamFlowContext): -# """Test saving ExecuteStep on database and re-load it in a new Workflow""" -# workflow, (job_port, in_port, out_port) = await create_workflow(context, num_port=3) -# -# in_port_name = "in-1" -# out_port_name = "out-1" -# step = workflow.create_step( -# cls=ExecuteStep, name=utils.random_name(), job_port=cast(JobPort, job_port) -# ) -# step.command = CWLCommand( -# step=step, -# base_command=["echo"], -# command_tokens=[CWLCommandToken(name=in_port_name, value=None)], -# ) -# step.add_output_port( -# out_port_name, -# out_port, -# _create_command_output_processor_base( -# out_port.name, -# workflow, -# None, -# "string", -# {}, -# {"hints": {}, "requirements": {}}, -# ), -# ) -# step.add_input_port(in_port_name, in_port) -# await workflow.save(context) -# new_workflow, new_step = await _clone_step(step, workflow, context) -# _persistent_id_test(workflow, new_workflow, step, new_step) -# -# assert step.command.step.persistent_id != new_step.command.step.persistent_id -# step.command.step = None -# new_step.command.step = None -# for original_processor, new_processor in zip( -# step.output_processors.values(), new_step.output_processors.values() -# ): -# assert ( -# original_processor.workflow.persistent_id -# != new_processor.workflow.persistent_id -# ) -# _set_to_none(original_processor, wf_to_none=True) -# _set_to_none(new_processor, wf_to_none=True) -# _set_to_none(step, id_to_none=True, wf_to_none=True) -# _set_to_none(new_step, id_to_none=True, wf_to_none=True) -# assert are_equals(step, new_step) -# -# -# @pytest.mark.asyncio -# async def test_schedule_step(context: StreamFlowContext): -# """Test saving ScheduleStep on database and re-load it in a new Workflow""" -# workflow = (await create_workflow(context, num_port=0))[0] -# deploy_step = create_deploy_step(workflow) -# nof_deployments = 2 -# step = create_schedule_step( -# workflow, -# [deploy_step for _ in range(nof_deployments)], -# BindingConfig( -# targets=[LocalTarget() for _ in range(nof_deployments)], -# filters=[ -# FilterConfig(config={}, name=utils.random_name(), type="shuffle") -# for _ in range(nof_deployments) -# ], -# ), -# ) -# await workflow.save(context) -# new_workflow, new_step = await _clone_step(step, workflow, context) -# _persistent_id_test(workflow, new_workflow, step, new_step) -# -# for original_filter, new_filter in zip( -# step.binding_config.filters, new_step.binding_config.filters -# ): -# # Config are read-only so workflows can share the same -# assert original_filter.persistent_id == new_filter.persistent_id -# _set_to_none(original_filter, id_to_none=True, wf_to_none=True) -# _set_to_none(new_filter, id_to_none=True, wf_to_none=True) -# _set_to_none(step, id_to_none=True, wf_to_none=True) -# _set_to_none(new_step, id_to_none=True, wf_to_none=True) -# assert are_equals(step, new_step) -# -# -# @pytest.mark.asyncio -# @pytest.mark.parametrize( -# "combinator", -# [ -# get_dot_combinator(), -# get_cartesian_product_combinator(), -# get_loop_terminator_combinator(), -# get_nested_crossproduct(), -# ], -# ids=[ -# "dot_combinator", -# "cartesian_product_combinator", -# "loop_termination_combinator", -# "nested_crossproduct", -# ], -# ) -# async def test_combinator_step(context: StreamFlowContext, combinator: Combinator): -# """Test saving CombinatorStep on database and re-load it in a new Workflow""" -# workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( -# context, num_port=4 -# ) -# _set_workflow_in_combinator(combinator, workflow) -# step = workflow.create_step( -# cls=CombinatorStep, -# name=utils.random_name() + "-combinator", -# combinator=combinator, -# ) -# port_name = "test" -# step.add_input_port(port_name, in_port) -# step.add_output_port(port_name, out_port) -# -# port_name_2 = f"{port_name}_2" -# step.add_input_port(port_name_2, in_port_2) -# step.add_output_port(port_name_2, out_port_2) -# -# await workflow.save(context) -# new_workflow, new_step = await _clone_step(step, workflow, context) -# _persistent_id_test(workflow, new_workflow, step, new_step) -# -# _set_to_none(step, id_to_none=True, wf_to_none=True) -# _set_to_none(new_step, id_to_none=True, wf_to_none=True) -# _workflow_in_combinator_test(step.combinator, new_step.combinator) -# _set_workflow_in_combinator(step.combinator, None) -# _set_workflow_in_combinator(new_step.combinator, None) -# assert are_equals(step, new_step) -# -# -# @pytest.mark.asyncio -# async def test_loop_combinator_step(context: StreamFlowContext): -# """Test saving LoopCombinatorStep on database and re-load it in a new Workflow""" -# workflow, (in_port, out_port, in_port_2, out_port_2) = await create_workflow( -# context, num_port=4 -# ) -# name = utils.random_name() -# step = workflow.create_step( -# cls=LoopCombinatorStep, -# name=name + "-combinator", -# combinator=LoopCombinator(name=name, workflow=workflow), -# ) -# port_name = "test" -# step.add_input_port(port_name, in_port) -# step.add_output_port(port_name, out_port) -# -# port_name_2 = f"{port_name}_2" -# step.add_input_port(port_name_2, in_port_2) -# step.add_output_port(port_name_2, out_port_2) -# -# await workflow.save(context) -# new_workflow, new_step = await _clone_step(step, workflow, context) -# _persistent_id_test(workflow, new_workflow, step, new_step) -# -# _set_to_none(step, id_to_none=True, wf_to_none=True) -# _set_to_none(new_step, id_to_none=True, wf_to_none=True) -# _workflow_in_combinator_test(step.combinator, new_step.combinator) -# _set_workflow_in_combinator(step.combinator, None) -# _set_workflow_in_combinator(new_step.combinator, None) -# assert are_equals(step, new_step) -# -# -# @pytest.mark.asyncio -# async def test_deploy_step(context: StreamFlowContext): -# """Test saving DeployStep on database and re-load it in a new Workflow""" -# workflow = (await create_workflow(context, num_port=0))[0] -# step = create_deploy_step(workflow) -# await workflow.save(context) -# new_workflow, new_step = await _clone_step(step, workflow, context) -# _persistent_id_test(workflow, new_workflow, step, new_step) -# -# -# @pytest.mark.asyncio -# async def test_gather_step(context: StreamFlowContext): -# """Test saving GatherStep on database and re-load it in a new Workflow""" -# workflow = (await create_workflow(context, num_port=0))[0] -# await _base_step_test_process( -# workflow, -# GatherStep, -# {"name": utils.random_name() + "-gather", "depth": 1}, -# context, -# ) -# -# -# @pytest.mark.asyncio -# async def test_scatter_step(context: StreamFlowContext): -# """Test saving ScatterStep on database and re-load it in a new Workflow""" -# workflow = (await create_workflow(context, num_port=0))[0] -# await _base_step_test_process( -# workflow, ScatterStep, {"name": utils.random_name() + "-scatter"}, context -# ) + # test two workflows are the same (i.e. same steps and ports) + assert are_equals(workflow, new_workflow) diff --git a/tests/test_cwl_build_wf.py b/tests/test_cwl_build_wf.py index 9bdbf4b72..0dc9c99ba 100644 --- a/tests/test_cwl_build_wf.py +++ b/tests/test_cwl_build_wf.py @@ -40,13 +40,16 @@ @pytest.mark.asyncio -async def test_default_transformer(context: StreamFlowContext): - """Test saving DefaultTransformer on database and re-load it in a new Workflow""" - workflow, (port,) = await create_workflow(context, num_port=1) +@pytest.mark.parametrize("step_cls", [CWLLoopOutputAllStep, CWLLoopOutputLastStep]) +async def test_cwl_loop_output(context: StreamFlowContext, step_cls): + """Test saving CWLLoopOutputAllStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=1))[0] await _base_step_test_process( workflow, - DefaultTransformer, - {"name": utils.random_name() + "-transformer", "default_port": port}, + step_cls, + { + "name": utils.random_name() + "-loop-output", + }, context, ) @@ -64,47 +67,68 @@ async def test_default_retag_transformer(context: StreamFlowContext): @pytest.mark.asyncio -async def test_value_from_transformer(context: StreamFlowContext): - """Test saving ValueFromTransformer on database and re-load it in a new Workflow""" +async def test_default_transformer(context: StreamFlowContext): + """Test saving DefaultTransformer on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + await _base_step_test_process( + workflow, + DefaultTransformer, + {"name": utils.random_name() + "-transformer", "default_port": port}, + context, + ) + + +@pytest.mark.asyncio +async def test_forward_transformer(context: StreamFlowContext): + """Test saving ForwardTransformer on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=1))[0] + await _base_step_test_process( + workflow, + ForwardTransformer, + { + "name": utils.random_name() + "-transformer", + }, + context, + ) + + +@pytest.mark.asyncio +async def test_list_merge_combinator(context: StreamFlowContext): + """Test saving ListMergeCombinator on database and re-load it in a new Workflow""" workflow, (port,) = await create_workflow(context, num_port=1) step, new_workflow, new_step = await _base_step_test_process( workflow, - ValueFromTransformer, + CombinatorStep, { - "name": utils.random_name() + "-value-from-transformer", - "processor": CWLTokenProcessor( - name=port.name, + "name": utils.random_name() + "-combinator", + "combinator": ListMergeCombinator( + name=utils.random_name(), workflow=workflow, + input_names=[port.name], + output_name=port.name, + flatten=False, ), - "port_name": port.name, - "full_js": True, - "value_from": f"$(inputs.{port.name} + 1)", }, context, test_are_eq=False, ) + _persistent_id_test(workflow, new_workflow, step, new_step) + _set_to_none(step, id_to_none=True, wf_to_none=True) _set_to_none(new_step, id_to_none=True, wf_to_none=True) - assert ( - step.processor.workflow.persistent_id - != new_step.processor.workflow.persistent_id - ) - _set_to_none(step.processor, wf_to_none=True) - _set_to_none(new_step.processor, wf_to_none=True) + _workflow_in_combinator_test(step.combinator, new_step.combinator) + _set_workflow_in_combinator(step.combinator, None) + _set_workflow_in_combinator(new_step.combinator, None) assert are_equals(step, new_step) @pytest.mark.asyncio -@pytest.mark.parametrize( - "transformer_cls", - [AllNonNullTransformer, FirstNonNullTransformer, OnlyNonNullTransformer], -) -async def test_non_null_transformer(context: StreamFlowContext, transformer_cls): - """Test saving All/First/Only NonNullTransformer on database and re-load it in a new Workflow""" +async def test_list_to_element_transformer(context: StreamFlowContext): + """Test saving ListToElementTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] await _base_step_test_process( workflow, - transformer_cls, + ListToElementTransformer, { "name": utils.random_name() + "-transformer", }, @@ -113,26 +137,49 @@ async def test_non_null_transformer(context: StreamFlowContext, transformer_cls) @pytest.mark.asyncio -async def test_forward_transformer(context: StreamFlowContext): - """Test saving ForwardTransformer on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=1))[0] - await _base_step_test_process( +async def test_loop_value_from_transformer(context: StreamFlowContext): + """Test saving LoopValueFromTransformer on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) + step, new_workflow, new_step = await _base_step_test_process( workflow, - ForwardTransformer, + LoopValueFromTransformer, { - "name": utils.random_name() + "-transformer", + "name": utils.random_name() + "-loop-value-from-transformer", + "processor": CWLTokenProcessor( + name=port.name, + workflow=workflow, + ), + "port_name": port.name, + "full_js": True, + "value_from": f"$(inputs.{port.name} + 1)", }, context, + test_are_eq=False, ) + _persistent_id_test(workflow, new_workflow, step, new_step) + + _set_to_none(step, id_to_none=True, wf_to_none=True) + _set_to_none(new_step, id_to_none=True, wf_to_none=True) + assert ( + step.processor.workflow.persistent_id + != new_step.processor.workflow.persistent_id + ) + _set_to_none(step.processor, id_to_none=True, wf_to_none=True) + _set_to_none(new_step.processor, id_to_none=True, wf_to_none=True) + assert are_equals(step, new_step) @pytest.mark.asyncio -async def test_list_to_element_transformer(context: StreamFlowContext): - """Test saving ListToElementTransformer on database and re-load it in a new Workflow""" +@pytest.mark.parametrize( + "transformer_cls", + [AllNonNullTransformer, FirstNonNullTransformer, OnlyNonNullTransformer], +) +async def test_non_null_transformer(context: StreamFlowContext, transformer_cls): + """Test saving All/First/Only NonNullTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] await _base_step_test_process( workflow, - ListToElementTransformer, + transformer_cls, { "name": utils.random_name() + "-transformer", }, @@ -141,20 +188,21 @@ async def test_list_to_element_transformer(context: StreamFlowContext): @pytest.mark.asyncio -async def test_cwl_token_transformer(context: StreamFlowContext): - """Test saving CWLTokenTransformer on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=1))[0] - step_name = utils.random_name() +async def test_value_from_transformer(context: StreamFlowContext): + """Test saving ValueFromTransformer on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) step, new_workflow, new_step = await _base_step_test_process( workflow, - CWLTokenTransformer, + ValueFromTransformer, { - "name": step_name + "-transformer", - "port_name": "test", + "name": utils.random_name() + "-value-from-transformer", "processor": CWLTokenProcessor( - name=step_name, + name=port.name, workflow=workflow, ), + "port_name": port.name, + "full_js": True, + "value_from": f"$(inputs.{port.name} + 1)", }, context, test_are_eq=False, @@ -165,8 +213,8 @@ async def test_cwl_token_transformer(context: StreamFlowContext): step.processor.workflow.persistent_id != new_step.processor.workflow.persistent_id ) - _set_to_none(step.processor, id_to_none=True, wf_to_none=True) - _set_to_none(new_step.processor, id_to_none=True, wf_to_none=True) + _set_to_none(step.processor, wf_to_none=True) + _set_to_none(new_step.processor, wf_to_none=True) assert are_equals(step, new_step) @@ -187,15 +235,15 @@ async def test_cwl_conditional_step(context: StreamFlowContext): @pytest.mark.asyncio -async def test_cwl_transfer_step(context: StreamFlowContext): - """Test saving CWLTransferStep on database and re-load it in a new Workflow""" - workflow, (port,) = await create_workflow(context, num_port=1) +async def test_cwl_empty_scatter_conditional_step(context: StreamFlowContext): + """Test saving CWLEmptyScatterConditionalStep on database and re-load it in a new Workflow""" + workflow = (await create_workflow(context, num_port=1))[0] await _base_step_test_process( workflow, - CWLTransferStep, + CWLEmptyScatterConditionalStep, { - "name": posixpath.join(utils.random_name(), "__transfer__", "test"), - "job_port": port, + "name": utils.random_name() + "-empty-scatter-condition", + "scatter_method": "dotproduct", }, context, ) @@ -233,72 +281,24 @@ async def test_cwl_loop_conditional_step(context: StreamFlowContext): @pytest.mark.asyncio -async def test_empty_scatter_conditional_step(context: StreamFlowContext): - """Test saving CWLEmptyScatterConditionalStep on database and re-load it in a new Workflow""" +async def test_cwl_token_transformer(context: StreamFlowContext): + """Test saving CWLTokenTransformer on database and re-load it in a new Workflow""" workflow = (await create_workflow(context, num_port=1))[0] - await _base_step_test_process( - workflow, - CWLEmptyScatterConditionalStep, - { - "name": utils.random_name() + "-empty-scatter-condition", - "scatter_method": "dotproduct", - }, - context, - ) - - -@pytest.mark.asyncio -async def test_list_merge_combinator(context: StreamFlowContext): - """Test saving ListMergeCombinator on database and re-load it in a new Workflow""" - workflow, (port,) = await create_workflow(context, num_port=1) - step, new_workflow, new_step = await _base_step_test_process( - workflow, - CombinatorStep, - { - "name": utils.random_name() + "-combinator", - "combinator": ListMergeCombinator( - name=utils.random_name(), - workflow=workflow, - input_names=[port.name], - output_name=port.name, - flatten=False, - ), - }, - context, - test_are_eq=False, - ) - _persistent_id_test(workflow, new_workflow, step, new_step) - - _set_to_none(step, id_to_none=True, wf_to_none=True) - _set_to_none(new_step, id_to_none=True, wf_to_none=True) - _workflow_in_combinator_test(step.combinator, new_step.combinator) - _set_workflow_in_combinator(step.combinator, None) - _set_workflow_in_combinator(new_step.combinator, None) - assert are_equals(step, new_step) - - -@pytest.mark.asyncio -async def test_loop_value_from_transformer(context: StreamFlowContext): - """Test saving LoopValueFromTransformer on database and re-load it in a new Workflow""" - workflow, (port,) = await create_workflow(context, num_port=1) + step_name = utils.random_name() step, new_workflow, new_step = await _base_step_test_process( workflow, - LoopValueFromTransformer, + CWLTokenTransformer, { - "name": utils.random_name() + "-loop-value-from-transformer", + "name": step_name + "-transformer", + "port_name": "test", "processor": CWLTokenProcessor( - name=port.name, + name=step_name, workflow=workflow, ), - "port_name": port.name, - "full_js": True, - "value_from": f"$(inputs.{port.name} + 1)", }, context, test_are_eq=False, ) - _persistent_id_test(workflow, new_workflow, step, new_step) - _set_to_none(step, id_to_none=True, wf_to_none=True) _set_to_none(new_step, id_to_none=True, wf_to_none=True) assert ( @@ -311,15 +311,15 @@ async def test_loop_value_from_transformer(context: StreamFlowContext): @pytest.mark.asyncio -@pytest.mark.parametrize("step_cls", [CWLLoopOutputAllStep, CWLLoopOutputLastStep]) -async def test_cwl_loop_output(context: StreamFlowContext, step_cls): - """Test saving CWLLoopOutputAllStep on database and re-load it in a new Workflow""" - workflow = (await create_workflow(context, num_port=1))[0] +async def test_cwl_transfer_step(context: StreamFlowContext): + """Test saving CWLTransferStep on database and re-load it in a new Workflow""" + workflow, (port,) = await create_workflow(context, num_port=1) await _base_step_test_process( workflow, - step_cls, + CWLTransferStep, { - "name": utils.random_name() + "-loop-output", + "name": posixpath.join(utils.random_name(), "__transfer__", "test"), + "job_port": port, }, context, ) From 4c7382c883d8e9fa4a03545762366d8b1d2ab671 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Mon, 8 Jan 2024 11:58:20 +0100 Subject: [PATCH 62/69] doc --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 68973259e..a54c427bc 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "b6317351be9e647112776f7c462400c2be7991110b4c46eaa7c49d04774cb22e" + CHECKSUM: "2f5ff293468fec970a5594dad69368839f1a1bab8dfe97019cdfd20a5154df11" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 75d980738..2ac7ff328 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -334,17 +334,20 @@ Furthermore, it is in charge of assign the ``persistent_id`` when an entity is a WorkflowBuilder -^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^ The ``WorkflowBuilder`` class loads the steps and ports of an existing workflow from the database and inserts them into a new workflow object, which is passed as argument to the constructor. Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment``` and ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. This is done by loading the entity, keeping the ``persistent_id`` in the case of a shared object, or creating a new ``persistent_id`` otherwise. The ``WorkflowBuilder`` class extends the ``DefaultDatabaseLoadingContext`` class and overwrites only the methods involving the ``step``, ``port``, and ``workflow`` entities. -The class has the ``workflow`` attribute, i.e., the new ``workflow`` instance, and the ``load_workflow`` method returns it. +The class has the ``workflow``, i.e., the new ``workflow`` instance, and the ``load_entire_wf`` attributes. +This latter attribute has default value to False, when it is set to True, it will load in the new workflow all the entities involved in the original workflow. Instead, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. .. code-block:: python - def __init__(self, workflow: Workflow): + def __init__(self, workflow: Workflow, load_entire_wf: bool = False): super().__init__() + self.load_entire_wf: bool = load_entire_wf self.workflow: Workflow = workflow + From f12696bc24cd6a8480bff966839094e3a41b509c Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Mon, 8 Jan 2024 12:26:07 +0100 Subject: [PATCH 63/69] doc --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 5 ++--- tests/test_build_wf.py | 3 --- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index a54c427bc..fbe5597a0 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "2f5ff293468fec970a5594dad69368839f1a1bab8dfe97019cdfd20a5154df11" + CHECKSUM: "84a7a90b2204660aa16f534827f69a80bddfe013eecbc8aa402638fa02efa7f8" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 2ac7ff328..108ae674b 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -339,10 +339,9 @@ The ``WorkflowBuilder`` class loads the steps and ports of an existing workflow Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment``` and ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. This is done by loading the entity, keeping the ``persistent_id`` in the case of a shared object, or creating a new ``persistent_id`` otherwise. The ``WorkflowBuilder`` class extends the ``DefaultDatabaseLoadingContext`` class and overwrites only the methods involving the ``step``, ``port``, and ``workflow`` entities. +Particularly, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. The class has the ``workflow``, i.e., the new ``workflow`` instance, and the ``load_entire_wf`` attributes. -This latter attribute has default value to False, when it is set to True, it will load in the new workflow all the entities involved in the original workflow. -Instead, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. - +This latter attribute has a default value of False; when it is initialized to True, it will load all the entities of the original workflow in the new workflow. .. code-block:: python def __init__(self, workflow: Workflow, load_entire_wf: bool = False): diff --git a/tests/test_build_wf.py b/tests/test_build_wf.py index 9aed6cad2..dc9bb14b3 100644 --- a/tests/test_build_wf.py +++ b/tests/test_build_wf.py @@ -130,9 +130,6 @@ def _workflow_in_combinator_test(original_combinator, new_combinator): _workflow_in_combinator_test(original_inner, new_inner) -# a b c d e f g h i j k l m n o p q r s t u v w x y z - - @pytest.mark.asyncio @pytest.mark.parametrize( "combinator", From 29d2e22d631530a52363f119442d0a137c252371 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 9 Jan 2024 10:40:30 +0100 Subject: [PATCH 64/69] removed load_entire_workflow parameter --- streamflow/persistence/loading_context.py | 17 +++++++---------- tests/test_build_wf.py | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 27af50c73..8066de43f 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -83,11 +83,9 @@ async def load_workflow(self, context: StreamFlowContext, persistent_id: int): class WorkflowBuilder(DefaultDatabaseLoadingContext): - def __init__(self, workflow: Workflow, load_entire_wf: bool = False): + def __init__(self, workflow: Workflow): super().__init__() - self.load_entire_wf: bool = load_entire_wf self.workflow: Workflow = workflow - self._wf_loaded = False def add_port(self, persistent_id: int, port: Port): pass @@ -96,28 +94,27 @@ def add_step(self, persistent_id: int, step: Step): pass def add_workflow(self, persistent_id: int, workflow: Workflow): - pass + self._workflows[persistent_id] = self.workflow async def load_step(self, context: StreamFlowContext, persistent_id: int): step_row = await context.database.get_step(persistent_id) - step = self.workflow.steps.get(step_row["name"]) - if step is None: + if (step := self.workflow.steps.get(step_row["name"])) is None: # If the step is not available in the new workflow, a new one must be created + self.add_workflow(step_row['workflow'], self.workflow) step = await Step.load(context, persistent_id, self) self.workflow.steps[step.name] = step return step async def load_port(self, context: StreamFlowContext, persistent_id: int): port_row = await context.database.get_port(persistent_id) - port = self.workflow.ports.get(port_row["name"]) - if port is None: + if (port := self.workflow.ports.get(port_row["name"])) is None: # If the port is not available in the new workflow, a new one must be created + self.add_workflow(port_row['workflow'], self.workflow) port = await Port.load(context, persistent_id, self) self.workflow.ports[port.name] = port return port async def load_workflow(self, context: StreamFlowContext, persistent_id: int): - if self.load_entire_wf and not self._wf_loaded: - self._wf_loaded = True + if persistent_id not in self._workflows.keys(): await Workflow.load(context, persistent_id, self) return self.workflow diff --git a/tests/test_build_wf.py b/tests/test_build_wf.py index dc9bb14b3..53111e3a1 100644 --- a/tests/test_build_wf.py +++ b/tests/test_build_wf.py @@ -360,7 +360,7 @@ async def test_workflow(context: StreamFlowContext): await workflow.save(context) new_workflow = (await create_workflow(context, num_port=0))[0] - loading_context = WorkflowBuilder(new_workflow, load_entire_wf=True) + loading_context = WorkflowBuilder(new_workflow) new_workflow = await loading_context.load_workflow(context, workflow.persistent_id) assert new_workflow.name != workflow.name From dbe62e13863bb3a830630509ac03e94bda988d9b Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 9 Jan 2024 10:50:49 +0100 Subject: [PATCH 65/69] doc --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 16 +--------------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index fbe5597a0..126a26e63 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "84a7a90b2204660aa16f534827f69a80bddfe013eecbc8aa402638fa02efa7f8" + CHECKSUM: "58300445718daa0ff656f6b292f73dbc3db89bf381f4d41e230e59d1fd97c163" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 108ae674b..55c3c1a0a 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -260,59 +260,45 @@ The ``DatabaseLoadingContext`` interface allows to define classes that manage th .. code-block:: python - @abstractmethod def add_deployment(self, persistent_id: int, deployment: DeploymentConfig): ... - @abstractmethod def add_filter(self, persistent_id: int, filter_config: FilterConfig): ... - @abstractmethod def add_port(self, persistent_id: int, port: Port): ... - @abstractmethod def add_step(self, persistent_id: int, step: Step): ... - @abstractmethod def add_target(self, persistent_id: int, target: Target): ... - @abstractmethod def add_token(self, persistent_id: int, token: Token): ... - @abstractmethod def add_workflow(self, persistent_id: int, workflow: Workflow): ... - @abstractmethod async def load_deployment(self, context: StreamFlowContext, persistent_id: int): ... - @abstractmethod async def load_filter(self, context: StreamFlowContext, persistent_id: int): ... - @abstractmethod async def load_port(self, context: StreamFlowContext, persistent_id: int): ... - @abstractmethod async def load_step(self, context: StreamFlowContext, persistent_id: int): ... - @abstractmethod async def load_target(self, context: StreamFlowContext, persistent_id: int): ... - @abstractmethod async def load_token(self, context: StreamFlowContext, persistent_id: int): ... - @abstractmethod async def load_workflow(self, context: StreamFlowContext, persistent_id: int): ... @@ -341,7 +327,7 @@ This is done by loading the entity, keeping the ``persistent_id`` in the case of The ``WorkflowBuilder`` class extends the ``DefaultDatabaseLoadingContext`` class and overwrites only the methods involving the ``step``, ``port``, and ``workflow`` entities. Particularly, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. The class has the ``workflow``, i.e., the new ``workflow`` instance, and the ``load_entire_wf`` attributes. -This latter attribute has a default value of False; when it is initialized to True, it will load all the entities of the original workflow in the new workflow. +This latter attribute has a default value of False; when it is initialized to True, the ``load_workflow`` method will load all the entities of the original workflow in the new workflow. .. code-block:: python def __init__(self, workflow: Workflow, load_entire_wf: bool = False): From 4aed45e551c156d71e9eb340b9332b77a742fab9 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Tue, 9 Jan 2024 14:45:04 +0100 Subject: [PATCH 66/69] removed WorkflowBuilder logic in the step load method --- streamflow/core/workflow.py | 13 ++++++------- streamflow/persistence/loading_context.py | 7 ++++++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index 2d4464045..c5a4f658b 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -432,6 +432,12 @@ async def load( row = await context.database.get_step(persistent_id) type = cast(Type[Step], utils.get_class_from_name(row["type"])) step = await type._load(context, row, loading_context) + step.status = Status(row["status"]) + step.terminated = step.status in [ + Status.COMPLETED, + Status.FAILED, + Status.SKIPPED, + ] input_deps = await context.database.get_input_ports(persistent_id) loading_context.add_step(persistent_id, step) step.input_ports = await load_dependencies( @@ -447,13 +453,6 @@ async def load( context, loading_context, ) - if step.persistent_id: - step.status = Status(row["status"]) - step.terminated = step.status in [ - Status.COMPLETED, - Status.FAILED, - Status.SKIPPED, - ] return step @abstractmethod diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 8066de43f..8f6cdd67b 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -4,7 +4,7 @@ from streamflow.core.context import StreamFlowContext from streamflow.core.deployment import DeploymentConfig, Target, FilterConfig from streamflow.core.persistence import DatabaseLoadingContext -from streamflow.core.workflow import Port, Step, Token, Workflow +from streamflow.core.workflow import Port, Step, Token, Workflow, Status class DefaultDatabaseLoadingContext(DatabaseLoadingContext): @@ -102,6 +102,11 @@ async def load_step(self, context: StreamFlowContext, persistent_id: int): # If the step is not available in the new workflow, a new one must be created self.add_workflow(step_row['workflow'], self.workflow) step = await Step.load(context, persistent_id, self) + + # restore initial step state + step.status = Status.WAITING + step.terminated = False + self.workflow.steps[step.name] = step return step From 79d5c3ffb93b9171b36ffca69d8bdee65c3112ed Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Thu, 11 Jan 2024 11:11:22 +0100 Subject: [PATCH 67/69] removed load_dependencies --- streamflow/core/utils.py | 35 ----------------- streamflow/core/workflow.py | 25 ++++++------ streamflow/persistence/loading_context.py | 48 +++++++++++++---------- 3 files changed, 41 insertions(+), 67 deletions(-) diff --git a/streamflow/core/utils.py b/streamflow/core/utils.py index e79ea9e70..441d4d0f1 100644 --- a/streamflow/core/utils.py +++ b/streamflow/core/utils.py @@ -15,10 +15,7 @@ MutableSequence, TYPE_CHECKING, ) - -from streamflow.core.context import StreamFlowContext from streamflow.core.exception import WorkflowExecutionException -from streamflow.core.persistence import DatabaseLoadingContext if TYPE_CHECKING: from streamflow.core.deployment import Connector, Location @@ -278,35 +275,3 @@ def random_name() -> str: def wrap_command(command: str): return ["/bin/sh", "-c", f"{command}"] - - -async def load_dependencies( - dependency_rows: MutableSequence[MutableMapping[str, Any]], - load_ports: bool, - context: StreamFlowContext, - loading_context: DatabaseLoadingContext, -): - # This method is generally called from the step load method. - # If the steps and ports are loaded into their workflow, it is helpful call loading_context.load_port because - # - if port instance is already in the loading_context, it is used that instance - # - otherwise a new port instance is created, - # and it will be helpful because that instance will be added in the workflow - # If the ports are loaded into a new workflow, it is not helpful call loading_context.load_port because - # - if the instance is not into the loading_context it is created a new one, - # but this instance will not add into the new workflow because it has already the old workflow reference - if load_ports: - ports = await asyncio.gather( - *( - asyncio.create_task(loading_context.load_port(context, d["port"])) - for d in dependency_rows - ) - ) - return {d["name"]: p.name for d, p in zip(dependency_rows, ports)} - else: - port_rows = await asyncio.gather( - *( - asyncio.create_task(context.database.get_port(d["port"])) - for d in dependency_rows - ) - ) - return {d["name"]: p["name"] for d, p in zip(dependency_rows, port_rows)} diff --git a/streamflow/core/workflow.py b/streamflow/core/workflow.py index c5a4f658b..3d97fbbc2 100644 --- a/streamflow/core/workflow.py +++ b/streamflow/core/workflow.py @@ -15,7 +15,6 @@ DependencyType, PersistableEntity, ) -from streamflow.core.utils import load_dependencies if TYPE_CHECKING: from streamflow.core.deployment import Connector, Location, Target @@ -440,19 +439,23 @@ async def load( ] input_deps = await context.database.get_input_ports(persistent_id) loading_context.add_step(persistent_id, step) - step.input_ports = await load_dependencies( - input_deps, - step.persistent_id is None, - context, - loading_context, + input_ports = await asyncio.gather( + *( + asyncio.create_task(loading_context.load_port(context, d["port"])) + for d in input_deps + ) ) + step.input_ports = {d["name"]: p.name for d, p in zip(input_deps, input_ports)} output_deps = await context.database.get_output_ports(persistent_id) - step.output_ports = await load_dependencies( - output_deps, - step.persistent_id is None, - context, - loading_context, + output_ports = await asyncio.gather( + *( + asyncio.create_task(loading_context.load_port(context, d["port"])) + for d in output_deps + ) ) + step.output_ports = { + d["name"]: p.name for d, p in zip(output_deps, output_ports) + } return step @abstractmethod diff --git a/streamflow/persistence/loading_context.py b/streamflow/persistence/loading_context.py index 8f6cdd67b..5f0642e71 100644 --- a/streamflow/persistence/loading_context.py +++ b/streamflow/persistence/loading_context.py @@ -88,36 +88,42 @@ def __init__(self, workflow: Workflow): self.workflow: Workflow = workflow def add_port(self, persistent_id: int, port: Port): - pass + self._ports[persistent_id] = port def add_step(self, persistent_id: int, step: Step): - pass + self._steps[persistent_id] = step def add_workflow(self, persistent_id: int, workflow: Workflow): self._workflows[persistent_id] = self.workflow async def load_step(self, context: StreamFlowContext, persistent_id: int): - step_row = await context.database.get_step(persistent_id) - if (step := self.workflow.steps.get(step_row["name"])) is None: - # If the step is not available in the new workflow, a new one must be created - self.add_workflow(step_row['workflow'], self.workflow) - step = await Step.load(context, persistent_id, self) - - # restore initial step state - step.status = Status.WAITING - step.terminated = False - - self.workflow.steps[step.name] = step - return step + if persistent_id in self._steps.keys(): + return self._steps[persistent_id] + else: + step_row = await context.database.get_step(persistent_id) + if (step := self.workflow.steps.get(step_row["name"])) is None: + # If the step is not available in the new workflow, a new one must be created + self.add_workflow(step_row["workflow"], self.workflow) + step = await Step.load(context, persistent_id, self) + + # restore initial step state + step.status = Status.WAITING + step.terminated = False + + self.workflow.steps[step.name] = step + return step async def load_port(self, context: StreamFlowContext, persistent_id: int): - port_row = await context.database.get_port(persistent_id) - if (port := self.workflow.ports.get(port_row["name"])) is None: - # If the port is not available in the new workflow, a new one must be created - self.add_workflow(port_row['workflow'], self.workflow) - port = await Port.load(context, persistent_id, self) - self.workflow.ports[port.name] = port - return port + if persistent_id in self._ports.keys(): + return self._ports[persistent_id] + else: + port_row = await context.database.get_port(persistent_id) + if (port := self.workflow.ports.get(port_row["name"])) is None: + # If the port is not available in the new workflow, a new one must be created + self.add_workflow(port_row["workflow"], self.workflow) + port = await Port.load(context, persistent_id, self) + self.workflow.ports[port.name] = port + return port async def load_workflow(self, context: StreamFlowContext, persistent_id: int): if persistent_id not in self._workflows.keys(): From b1ac86615971922bb4c7b7ac3ba182f5d578f97c Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Thu, 11 Jan 2024 11:22:00 +0100 Subject: [PATCH 68/69] doc --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 126a26e63..32aac5f28 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "58300445718daa0ff656f6b292f73dbc3db89bf381f4d41e230e59d1fd97c163" + CHECKSUM: "aa9814377515faabfe9ca362b24799f1733e8e95b962a726b12d3aeb5fe0d2ea" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index 55c3c1a0a..f44e1bc09 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -23,7 +23,7 @@ StreamFlow relies on a persistent ``Database`` to store all the metadata regardi ) -> None: ... -Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance, furthermore it does not assign the ``persistent_id``. +Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance. Furthermore, the ``load`` method does not assign the ``persistent_id``. The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context`` (see :ref:`DatabaseLoadingContext `). @@ -316,23 +316,26 @@ Name Class DefaultDatabaseLoadingContext ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``DefaultDatabaseLoadingContext`` keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. -Furthermore, it is in charge of assign the ``persistent_id`` when an entity is added to the cache with the ``add_*`` methods. +Furthermore, it assigns the ``persistent_id`` when an entity is added to the cache with the ``add_entity`` method. WorkflowBuilder ^^^^^^^^^^^^^^^ -The ``WorkflowBuilder`` class loads the steps and ports of an existing workflow from the database and inserts them into a new workflow object, which is passed as argument to the constructor. +The ``WorkflowBuilder`` class loads the steps and ports of an existing workflow from the database and inserts them into a new workflow object, passed as an argument to the constructor. Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment``` and ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. This is done by loading the entity, keeping the ``persistent_id`` in the case of a shared object, or creating a new ``persistent_id`` otherwise. -The ``WorkflowBuilder`` class extends the ``DefaultDatabaseLoadingContext`` class and overwrites only the methods involving the ``step``, ``port``, and ``workflow`` entities. +The ``WorkflowBuilder`` class extends the ``DefaultDatabaseLoadingContext`` class; it has the ``workflow``, i.e., the new ``workflow`` instance, moreover, overwrites only the methods involving the ``step``, ``port``, and ``workflow`` entities. Particularly, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. -The class has the ``workflow``, i.e., the new ``workflow`` instance, and the ``load_entire_wf`` attributes. -This latter attribute has a default value of False; when it is initialized to True, the ``load_workflow`` method will load all the entities of the original workflow in the new workflow. +The class has the ``workflow``, i.e., the new ``workflow`` instance. +The ``load_workflow`` method has two behaviors based on the calling order. +When a ``WorkflowBuilder`` instance is created, and the ``load_workflow`` method is called as the first method, it loads all the entities of the original workflow in the new one. +Instead, if it is called after the ``load_step`` or ``load_port`` methods, it returns the ``self.workflow`` without loading other entities. +This allows to copy the entire workflow or load only a subset. + + .. code-block:: python - def __init__(self, workflow: Workflow, load_entire_wf: bool = False): + def __init__(self, workflow: Workflow): super().__init__() - self.load_entire_wf: bool = load_entire_wf self.workflow: Workflow = workflow - From 2fde06b9df4f9c18d27c9ade89f859b1c4bd5619 Mon Sep 17 00:00:00 2001 From: Alberto Mulone Date: Sat, 13 Jan 2024 13:32:46 +0100 Subject: [PATCH 69/69] doc --- .github/workflows/ci-tests.yaml | 2 +- docs/source/ext/database.rst | 29 ++++++++--------------------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 32aac5f28..566806d58 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -147,7 +147,7 @@ jobs: python -m pip install -r docs/requirements.txt - name: "Build documentation and check for consistency" env: - CHECKSUM: "aa9814377515faabfe9ca362b24799f1733e8e95b962a726b12d3aeb5fe0d2ea" + CHECKSUM: "fc9bdd01ef90f0b24d019da7683aa528af10119ef54d0a13cb16ec7adaa04242" run: | cd docs/ HASH="$(make checksum | tail -n1)" diff --git a/docs/source/ext/database.rst b/docs/source/ext/database.rst index f44e1bc09..1a7a930a9 100644 --- a/docs/source/ext/database.rst +++ b/docs/source/ext/database.rst @@ -23,9 +23,9 @@ StreamFlow relies on a persistent ``Database`` to store all the metadata regardi ) -> None: ... -Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance. Furthermore, the ``load`` method does not assign the ``persistent_id``. +Each ``PersistableEntity`` is identified by a unique numerical ``persistent_id`` related to the corresponding ``Database`` record. Two methods, ``save`` and ``load``, allow persisting the entity in the ``Database`` and retrieving it from the persistent record. Note that ``load`` is a class method, as it must construct a new instance. -The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context`` (see :ref:`DatabaseLoadingContext `). +The ``load`` method receives three input parameters: the current execution ``context``, the ``persistent_id`` of the instance that should be loaded, and a ``loading_context`` (see :ref:`DatabaseLoadingContext `). Note that the ``load`` method should not directly assign the ``persistent_id`` to the new entity, as this operation is in charge to the :ref:`DatabaseLoadingContext ` class. Persistence =========== @@ -255,8 +255,8 @@ The database schema is structured as follows: DatabaseLoadingContext ====================== -Workflow loading can be costly in terms of time and memory but also tricky, with the possibility of deadlock. -The ``DatabaseLoadingContext`` interface allows to define classes that manage these problems. Good practice is to load the objects from these classes instead of using directly the entity ``load`` methods. +Workflow loading is a delicate operation. If not managed properly, it can be costly in terms of time and memory and lead to deadlocks in case of circular references. +The ``DatabaseLoadingContext`` interface allows to define classes in charge of managing these aspects. Users should always rely on these classes to load entities, instead of directly calling ``load`` methods from ``PersistableEntity`` instances. .. code-block:: python @@ -316,26 +316,13 @@ Name Class DefaultDatabaseLoadingContext ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``DefaultDatabaseLoadingContext`` keeps track of all the objects already loaded in the current transaction, serving as a cache to efficiently load nested entities and prevent deadlocks when dealing with circular references. -Furthermore, it assigns the ``persistent_id`` when an entity is added to the cache with the ``add_entity`` method. - +Furthermore, it is in charge of assigning the ``persistent_id`` when an entity is added to the cache through an ``add_*`` method. WorkflowBuilder ^^^^^^^^^^^^^^^ -The ``WorkflowBuilder`` class loads the steps and ports of an existing workflow from the database and inserts them into a new workflow object, passed as an argument to the constructor. -Between the workflows, it is possible to have some shared entities, particularly those used only in reading, for example ``deployment``` and ``target``. Instead, the entities with an internal state must be different instances, so ``steps``, ``ports`` and ``workflow``. -This is done by loading the entity, keeping the ``persistent_id`` in the case of a shared object, or creating a new ``persistent_id`` otherwise. -The ``WorkflowBuilder`` class extends the ``DefaultDatabaseLoadingContext`` class; it has the ``workflow``, i.e., the new ``workflow`` instance, moreover, overwrites only the methods involving the ``step``, ``port``, and ``workflow`` entities. -Particularly, the ``add_step``, ``add_port`` and ``add_workflow`` methods do not set the ``persistent_id`` as their parent methods. -The class has the ``workflow``, i.e., the new ``workflow`` instance. -The ``load_workflow`` method has two behaviors based on the calling order. -When a ``WorkflowBuilder`` instance is created, and the ``load_workflow`` method is called as the first method, it loads all the entities of the original workflow in the new one. -Instead, if it is called after the ``load_step`` or ``load_port`` methods, it returns the ``self.workflow`` without loading other entities. -This allows to copy the entire workflow or load only a subset. - +The ``WorkflowBuilder`` class loads the steps and ports of an existing workflow from a ``Database`` and inserts them into a new workflow object received as a constructor argument. It extends the ``DefaultDatabaseLoadingContext`` class and overrides only the methods involving ``step``, ``port``, and ``workflow`` entities. In particular, the ``add_*`` methods of these entities must not set the ``persistent_id``, as they are dealing with a newly-created workflow, and the ``load_*`` methods should reset the internal state of their entities to the initial value (e.g., reset the status to `Status.WAITING` and clear the `terminated` flag). -.. code-block:: python +The ``load_workflow`` method must behave in two different ways, depending on whether it is called directly from a user or in the internal logic of another entity's ``load`` method. In the first case, it should load all the entities related to the original workflow, identified by the ``persistent_id`` argument, into the new one. In the latter case it should simply return the new workflow entity being built. - def __init__(self, workflow: Workflow): - super().__init__() - self.workflow: Workflow = workflow +Other entities, such as ``deployment`` and ``target`` objects, can be safely shared between the old and the new workflows, as their internal state does not need to be modified. Therefore, they can be loaded following the common path implemented in the ``DefaultDatabaseLoadingContext`` class.