Skip to content

Commit

Permalink
Merge pull request #775 from haddocking/mixed_running
Browse files Browse the repository at this point in the history
uniform running modes
  • Loading branch information
mgiulini authored Jan 15, 2024
2 parents 9c0d347 + 0756db9 commit 5ded133
Show file tree
Hide file tree
Showing 36 changed files with 118 additions and 57 deletions.
2 changes: 1 addition & 1 deletion docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ corresponding to different types of complexes, scenarios and data.
Each directory contains both:

- __test__ runs config files (to test the various workflows, make sure the installation works and any changes to the code/scripts have not broken the machinery). Those are set up to run locally.
- __full__ runs config files with recommended parameter settings. Those runs are set up to be executed in HPC mode using slurm (the `...full.cfg`) files. Examples making use of MPI are also provided in some cases, together with an associated job file that should be submitted to the slurm batch system (`...full-mpi.cfg` and `...full-mpi.job`). Make sure to adapt the full config files to your own system.
- __full__ runs config files with recommended parameter settings. Those runs are set up to be executed in "batch" mode using slurm (the `...full.cfg`) files. Examples making use of MPI are also provided in some cases, together with an associated job file that should be submitted to the slurm batch system (`...full-mpi.cfg` and `...full-mpi.job`). Make sure to adapt the full config files to your own system.

The following examples are currently provided:

Expand Down
2 changes: 1 addition & 1 deletion docs/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ You can find examples of HADDOCK3 workflows for the different biological
systems in the `examples` [subfolder][examples] of the HADDOCK3 folder.

HADDOCK3 will start running (different execution modes are available using
either local resources (threads) or in HPC mode using a batch queuing system) -
either local resources (threads) or in "batch" mode using a batch queuing system) -
for details, see [here][queue]. See examples in the `examples` [subfolder][examples]
for configuration files ending in `-full.cfg`.

Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/user_config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ described inside a step will affect only that step. For example:
]
# each .job will produce 5 (or less) models
mode = "hpc"
mode = "batch"
concat = 5
[topoaa]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-CDR-NMR-CSP-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-CDR-acc-cltsel-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-CDR-acc-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-ranairCDR-cltsel-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-ranairCDR-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
run_dir = "run1-multiple-tbls-cltsel-full"

# compute mode
mode = "hpc"
mode = "batch"
# concatenate models inside each job, concat = 5 each .job will produce 5 models
concat = 5
# Limit the number of concurrent submissions to the queue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-cltsel-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
2 changes: 1 addition & 1 deletion examples/docking-protein-DNA/docking-protein-DNA-full.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-mdref-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ run_dir = "run1-flexref-full"
#mode = "local"
#ncores = 40

mode = "hpc"
mode = "batch"
# concatenate models inside each job
concat = 5
# Limit the number of concurrent submissions to the queue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ run_dir = "run1-full"
#ncores = 40

# BATCH/HPC EXECUTION
mode = "hpc"
mode = "batch"
# concatenate models inside each job
concat = 5
# Limit the number of concurrent submissions to the queue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-cltsel-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-mdref-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
run_dir = "run1-cltsel-full"

# execution mode
mode = "hpc"
mode = "batch"
# concatenate models inside each job, concat = 5 each .job will produce 5 models
concat = 5
# Limit the number of concurrent submissions to the queue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-full"

# execution mode
mode = "hpc"
mode = "batch"
# it will take the system's default
# queue = "short"
# concatenate models inside each job, concat = 5 each .job will produce 5 models
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-hpc-test"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
run_dir = "run1-mdref-full"

# execution mode
mode = "hpc"
mode = "batch"
# in which queue the jobs should run, if nothing is defined
# it will take the system's default
# queue = "short"
Expand Down
4 changes: 3 additions & 1 deletion src/haddock/clis/cli_mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ def main(pickled_tasks: FilePath) -> None:
results: list[FilePath] = []
for job in jobs:
job.run()
results.append(job.input_file)
# check if the job has an input file
if hasattr(job, "input_file"):
results.append(job.input_file)

# COMM.Barrier()
# results = MPI.COMM_WORLD.gather(results, root=0)
Expand Down
6 changes: 3 additions & 3 deletions src/haddock/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def _fill_emptypaths(self) -> None:
self._params[param] = EmptyPath()


EngineMode = Literal["hpc", "local", "mpi"]
EngineMode = Literal["batch", "local", "mpi"]


def get_engine(
Expand All @@ -374,7 +374,7 @@ def get_engine(
"""
# a bit of a factory pattern here
# this might end up in another module but for now its fine here
if mode == "hpc":
if mode == "batch":
return partial( # type: ignore
HPCScheduler,
target_queue=params["queue"],
Expand All @@ -392,7 +392,7 @@ def get_engine(
return partial(MPIScheduler, ncores=params["ncores"]) # type: ignore

else:
available_engines = ("hpc", "local", "mpi")
available_engines = ("batch", "local", "mpi")
raise ValueError(
f"Scheduler `mode` {mode!r} not recognized. "
f"Available options are {', '.join(available_engines)}"
Expand Down
21 changes: 21 additions & 0 deletions src/haddock/modules/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,24 @@ def confirm_resdic_chainid_length(params: Iterable[str]) -> None:
"more than one character in the chain "
"identifier. Chain IDs should have only one character."
)

def get_analysis_exec_mode(mode: str) -> str:
"""
Get the execution mode for analysis modules.
Parameters
----------
exec_mode : str
The execution mode to use.
Returns
-------
str
The execution mode to use for the analysis modules.
If it's "batch", it will be changed to "local".
"""
if mode != "batch":
exec_mode = mode
else:
exec_mode = "local"
return exec_mode
11 changes: 8 additions & 3 deletions src/haddock/modules/analysis/alascan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from pathlib import Path

from haddock import log
from haddock.libs.libparallel import Scheduler, get_index_list
from haddock.libs.libparallel import get_index_list
from haddock.libs.libutil import parse_ncores
from haddock.modules import BaseHaddockModule
from haddock.modules import get_engine
from haddock.modules.analysis import get_analysis_exec_mode
from haddock.modules.analysis.alascan.scan import (
Scan,
ScanJob,
Expand Down Expand Up @@ -67,8 +69,11 @@ def _run(self):
)
alascan_jobs.append(job)

scan_engine = Scheduler(alascan_jobs, ncores=ncores)
scan_engine.run()
exec_mode = get_analysis_exec_mode(self.params["mode"])

Engine = get_engine(exec_mode, self.params)
engine = Engine(alascan_jobs)
engine.run()

# cluster-based analysis
clt_alascan = alascan_cluster_analysis(models)
Expand Down
21 changes: 12 additions & 9 deletions src/haddock/modules/analysis/caprieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from pathlib import Path

from haddock.core.typing import Any, FilePath
from haddock.libs.libparallel import Scheduler
from haddock.modules import BaseHaddockModule
from haddock.modules import get_engine
from haddock.modules.analysis import get_analysis_exec_mode
from haddock.modules.analysis.caprieval.capri import (
CAPRI,
capri_cluster_analysis,
Expand Down Expand Up @@ -60,9 +61,9 @@ def _run(self) -> None:
# but by assigning each model to an individual job
# we can handle scenarios in wich the models are hetergoneous
# for example during CAPRI scoring
capri_jobs: list[CAPRI] = []
jobs: list[CAPRI] = []
for i, model_to_be_evaluated in enumerate(models, start=1):
capri_jobs.append(
jobs.append(
CAPRI(
identificator=str(i),
model=model_to_be_evaluated,
Expand All @@ -71,27 +72,29 @@ def _run(self) -> None:
params=self.params
)
)

exec_mode = get_analysis_exec_mode(self.params["mode"])

ncores = self.params['ncores']
capri_engine = Scheduler(capri_jobs, ncores=ncores)
capri_engine.run()
Engine = get_engine(exec_mode, self.params)
engine = Engine(jobs)
engine.run()

# very ugly way of loading the capri metrics back into
# the CAPRI object, there's definitively a better way
# of doing this
capri_jobs = merge_data(capri_jobs)
jobs = merge_data(jobs)

# Each job created one .tsv, unify them:
rearrange_ss_capri_output(
output_name="capri_ss.tsv",
output_count=len(capri_jobs),
output_count=len(jobs),
sort_key=self.params["sortby"],
sort_ascending=self.params["sort_ascending"],
path=Path(".")
)

capri_cluster_analysis(
capri_list=capri_jobs,
capri_list=jobs,
model_list=models,
output_fname="capri_clt.tsv",
clt_threshold=self.params["clt_threshold"],
Expand Down
1 change: 1 addition & 0 deletions src/haddock/modules/analysis/caprieval/capri.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from itertools import combinations
from pathlib import Path

os.environ['OPENBLAS_NUM_THREADS'] = '1'
import numpy as np
from pdbtools import pdb_segxchain
from scipy.spatial.distance import cdist
Expand Down
Loading

0 comments on commit 5ded133

Please sign in to comment.