Merge pull request #775 from haddocking/mixed_running

uniform running modes
haddocking · Jan 15, 2024 · 5ded133 · 5ded133
2 parents 9c0d347 + 0756db9
commit 5ded133
Show file tree

Hide file tree

Showing 36 changed files with 118 additions and 57 deletions.
diff --git a/docs/examples.md b/docs/examples.md
@@ -17,7 +17,7 @@ corresponding to different types of complexes, scenarios and data.
 Each directory contains both:
 
 - __test__ runs config files (to test the various workflows, make sure the installation works and any changes to the code/scripts have not broken the machinery). Those are set up to run locally.
-- __full__ runs config files with recommended parameter settings. Those runs are set up to be executed in HPC mode using slurm (the `...full.cfg`) files. Examples making use of MPI are also provided in some cases, together with an associated job file that should be submitted to the slurm batch system (`...full-mpi.cfg` and `...full-mpi.job`). Make sure to adapt the full config files to your own system.
+- __full__ runs config files with recommended parameter settings. Those runs are set up to be executed in "batch" mode using slurm (the `...full.cfg`) files. Examples making use of MPI are also provided in some cases, together with an associated job file that should be submitted to the slurm batch system (`...full-mpi.cfg` and `...full-mpi.job`). Make sure to adapt the full config files to your own system.
 
 The following examples are currently provided:
 

diff --git a/docs/intro.md b/docs/intro.md
@@ -107,7 +107,7 @@ You can find examples of HADDOCK3 workflows for the different biological
 systems in the `examples` [subfolder][examples] of the HADDOCK3 folder.
 
 HADDOCK3 will start running (different execution modes are available using
-either local resources (threads) or in HPC mode using a batch queuing system) -
+either local resources (threads) or in "batch" mode using a batch queuing system) -
 for details, see [here][queue]. See examples in the `examples` [subfolder][examples]
 for configuration files ending in `-full.cfg`.
 

diff --git a/docs/tutorials/user_config.rst b/docs/tutorials/user_config.rst
@@ -93,7 +93,7 @@ described inside a step will affect only that step. For example:
         ]
 
     # each .job will produce 5 (or less) models
-    mode = "hpc"
+    mode = "batch"
     concat = 5
 
     [topoaa]

diff --git a/examples/docking-antibody-antigen/docking-antibody-antigen-CDR-NMR-CSP-full.cfg b/examples/docking-antibody-antigen/docking-antibody-antigen-CDR-NMR-CSP-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-CDR-NMR-CSP-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-antibody-antigen/docking-antibody-antigen-CDR-accessible-clt-full.cfg b/examples/docking-antibody-antigen/docking-antibody-antigen-CDR-accessible-clt-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-CDR-acc-cltsel-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-antibody-antigen/docking-antibody-antigen-CDR-accessible-full.cfg b/examples/docking-antibody-antigen/docking-antibody-antigen-CDR-accessible-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-CDR-acc-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-antibody-antigen/docking-antibody-antigen-ranairCDR-clt-full.cfg b/examples/docking-antibody-antigen/docking-antibody-antigen-ranairCDR-clt-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-ranairCDR-cltsel-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-antibody-antigen/docking-antibody-antigen-ranairCDR-full.cfg b/examples/docking-antibody-antigen/docking-antibody-antigen-ranairCDR-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-ranairCDR-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-multiple-ambig/docking-multiple-tbls-clt-full.cfg b/examples/docking-multiple-ambig/docking-multiple-tbls-clt-full.cfg
@@ -6,7 +6,7 @@
 run_dir = "run1-multiple-tbls-cltsel-full"
 
 # compute mode
-mode = "hpc"
+mode = "batch"
 # concatenate models inside each job, concat = 5 each .job will produce 5 models
 concat = 5
 #  Limit the number of concurrent submissions to the queue

diff --git a/examples/docking-protein-DNA/docking-protein-DNA-cltsel-full.cfg b/examples/docking-protein-DNA/docking-protein-DNA-cltsel-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-cltsel-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-DNA/docking-protein-DNA-full.cfg b/examples/docking-protein-DNA/docking-protein-DNA-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-DNA/docking-protein-DNA-mdref-full.cfg b/examples/docking-protein-DNA/docking-protein-DNA-mdref-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-mdref-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-glycan/docking-flexref-protein-glycan-full.cfg b/examples/docking-protein-glycan/docking-flexref-protein-glycan-full.cfg
@@ -13,7 +13,7 @@ run_dir = "run1-flexref-full"
 #mode = "local"
 #ncores = 40
 
-mode = "hpc"
+mode = "batch"
 # concatenate models inside each job
 concat = 5
 #  Limit the number of concurrent submissions to the queue

diff --git a/examples/docking-protein-glycan/docking-protein-glycan-full.cfg b/examples/docking-protein-glycan/docking-protein-glycan-full.cfg
@@ -14,7 +14,7 @@ run_dir = "run1-full"
 #ncores = 40
 
 # BATCH/HPC EXECUTION
-mode = "hpc"
+mode = "batch"
 # concatenate models inside each job
 concat = 5
 #  Limit the number of concurrent submissions to the queue

diff --git a/examples/docking-protein-homotrimer/docking-protein-homotrimer-full.cfg b/examples/docking-protein-homotrimer/docking-protein-homotrimer-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-ligand-shape/docking-protein-ligand-shape-full.cfg b/examples/docking-protein-ligand-shape/docking-protein-ligand-shape-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-ligand/docking-protein-ligand-full.cfg b/examples/docking-protein-ligand/docking-protein-ligand-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-peptide/docking-protein-peptide-cltsel-full.cfg b/examples/docking-protein-peptide/docking-protein-peptide-cltsel-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-cltsel-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-peptide/docking-protein-peptide-full.cfg b/examples/docking-protein-peptide/docking-protein-peptide-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-peptide/docking-protein-peptide-mdref-full.cfg b/examples/docking-protein-peptide/docking-protein-peptide-mdref-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-mdref-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-protein/docking-protein-protein-cltsel-full.cfg b/examples/docking-protein-protein/docking-protein-protein-cltsel-full.cfg
@@ -6,7 +6,7 @@
 run_dir = "run1-cltsel-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # concatenate models inside each job, concat = 5 each .job will produce 5 models
 concat = 5
 #  Limit the number of concurrent submissions to the queue

diff --git a/examples/docking-protein-protein/docking-protein-protein-full.cfg b/examples/docking-protein-protein/docking-protein-protein-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 #  it will take the system's default
 # queue = "short"
 # concatenate models inside each job, concat = 5 each .job will produce 5 models

diff --git a/examples/docking-protein-protein/docking-protein-protein-hpc-test.cfg b/examples/docking-protein-protein/docking-protein-protein-hpc-test.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-hpc-test"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/examples/docking-protein-protein/docking-protein-protein-mdref-full.cfg b/examples/docking-protein-protein/docking-protein-protein-mdref-full.cfg
@@ -5,7 +5,7 @@
 run_dir = "run1-mdref-full"
 
 # execution mode
-mode = "hpc"
+mode = "batch"
 # in which queue the jobs should run, if nothing is defined
 #  it will take the system's default
 # queue = "short"

diff --git a/src/haddock/clis/cli_mpi.py b/src/haddock/clis/cli_mpi.py
@@ -95,7 +95,9 @@ def main(pickled_tasks: FilePath) -> None:
     results: list[FilePath] = []
     for job in jobs:
         job.run()
-        results.append(job.input_file)
+        # check if the job has an input file
+        if hasattr(job, "input_file"):
+            results.append(job.input_file)
 
     # COMM.Barrier()
     # results = MPI.COMM_WORLD.gather(results, root=0)

diff --git a/src/haddock/modules/__init__.py b/src/haddock/modules/__init__.py
@@ -353,7 +353,7 @@ def _fill_emptypaths(self) -> None:
                 self._params[param] = EmptyPath()
 
 
-EngineMode = Literal["hpc", "local", "mpi"]
+EngineMode = Literal["batch", "local", "mpi"]
 
 
 def get_engine(
@@ -374,7 +374,7 @@ def get_engine(
     """
     # a bit of a factory pattern here
     # this might end up in another module but for now its fine here
-    if mode == "hpc":
+    if mode == "batch":
         return partial(  # type: ignore
             HPCScheduler,
             target_queue=params["queue"],
@@ -392,7 +392,7 @@ def get_engine(
         return partial(MPIScheduler, ncores=params["ncores"])  # type: ignore
 
     else:
-        available_engines = ("hpc", "local", "mpi")
+        available_engines = ("batch", "local", "mpi")
         raise ValueError(
             f"Scheduler `mode` {mode!r} not recognized. "
             f"Available options are {', '.join(available_engines)}"

diff --git a/src/haddock/modules/analysis/__init__.py b/src/haddock/modules/analysis/__init__.py
@@ -30,3 +30,24 @@ def confirm_resdic_chainid_length(params: Iterable[str]) -> None:
                 "more than one character in the chain "
                 "identifier. Chain IDs should have only one character."
                 )
+
+def get_analysis_exec_mode(mode: str) -> str:
+    """
+    Get the execution mode for analysis modules.
+
+    Parameters
+    ----------
+    exec_mode : str
+        The execution mode to use.
+
+    Returns
+    -------
+    str
+        The execution mode to use for the analysis modules. 
+        If it's "batch", it will be changed to "local".
+    """
+    if mode != "batch":
+        exec_mode = mode
+    else:
+        exec_mode = "local"
+    return exec_mode
diff --git a/src/haddock/modules/analysis/alascan/__init__.py b/src/haddock/modules/analysis/alascan/__init__.py
@@ -2,9 +2,11 @@
 from pathlib import Path
 
 from haddock import log
-from haddock.libs.libparallel import Scheduler, get_index_list
+from haddock.libs.libparallel import get_index_list
 from haddock.libs.libutil import parse_ncores
 from haddock.modules import BaseHaddockModule
+from haddock.modules import get_engine
+from haddock.modules.analysis import get_analysis_exec_mode
 from haddock.modules.analysis.alascan.scan import (
     Scan,
     ScanJob,
@@ -67,8 +69,11 @@ def _run(self):
                 )
             alascan_jobs.append(job)
 
-        scan_engine = Scheduler(alascan_jobs, ncores=ncores)
-        scan_engine.run()
+        exec_mode = get_analysis_exec_mode(self.params["mode"])
+
+        Engine = get_engine(exec_mode, self.params)
+        engine = Engine(alascan_jobs)
+        engine.run()
 
         # cluster-based analysis
         clt_alascan = alascan_cluster_analysis(models)

diff --git a/src/haddock/modules/analysis/caprieval/__init__.py b/src/haddock/modules/analysis/caprieval/__init__.py
@@ -2,8 +2,9 @@
 from pathlib import Path
 
 from haddock.core.typing import Any, FilePath
-from haddock.libs.libparallel import Scheduler
 from haddock.modules import BaseHaddockModule
+from haddock.modules import get_engine
+from haddock.modules.analysis import get_analysis_exec_mode
 from haddock.modules.analysis.caprieval.capri import (
     CAPRI,
     capri_cluster_analysis,
@@ -60,9 +61,9 @@ def _run(self) -> None:
         #  but by assigning each model to an individual job
         #  we can handle scenarios in wich the models are hetergoneous
         #  for example during CAPRI scoring
-        capri_jobs: list[CAPRI] = []
+        jobs: list[CAPRI] = []
         for i, model_to_be_evaluated in enumerate(models, start=1):
-            capri_jobs.append(
+            jobs.append(
                 CAPRI(
                     identificator=str(i),
                     model=model_to_be_evaluated,
@@ -71,27 +72,29 @@ def _run(self) -> None:
                     params=self.params
                     )
                 )
+
+        exec_mode = get_analysis_exec_mode(self.params["mode"])
 
-        ncores = self.params['ncores']
-        capri_engine = Scheduler(capri_jobs, ncores=ncores)
-        capri_engine.run()
+        Engine = get_engine(exec_mode, self.params)
+        engine = Engine(jobs)
+        engine.run()
 
         # very ugly way of loading the capri metrics back into
         #  the CAPRI object, there's definitively a better way
         #  of doing this
-        capri_jobs = merge_data(capri_jobs)
+        jobs = merge_data(jobs)
 
         # Each job created one .tsv, unify them:
         rearrange_ss_capri_output(
             output_name="capri_ss.tsv",
-            output_count=len(capri_jobs),
+            output_count=len(jobs),
             sort_key=self.params["sortby"],
             sort_ascending=self.params["sort_ascending"],
             path=Path(".")
             )
 
         capri_cluster_analysis(
-            capri_list=capri_jobs,
+            capri_list=jobs,
             model_list=models,
             output_fname="capri_clt.tsv",
             clt_threshold=self.params["clt_threshold"],

diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py
@@ -5,6 +5,7 @@
 from itertools import combinations
 from pathlib import Path
 
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
 import numpy as np
 from pdbtools import pdb_segxchain
 from scipy.spatial.distance import cdist