Skip to content

Commit

Permalink
Merge pull request #794 from haddocking/zip_structures
Browse files Browse the repository at this point in the history
Zip structures
  • Loading branch information
mgiulini authored Feb 5, 2024
2 parents 30685df + 50aee88 commit debcc24
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 0 deletions.
63 changes: 63 additions & 0 deletions src/haddock/clis/cli_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
ParamMap,
)
from haddock.gear.yaml2cfg import read_from_yaml_config
from haddock.gear.clean_steps import _unpack_gz
from haddock.libs.libcli import _ParamsToDict
from haddock.libs.libio import archive_files_ext
from haddock.libs.libontology import ModuleIO
from haddock.libs.libplots import (
ClRank,
Expand Down Expand Up @@ -299,6 +301,65 @@ def update_paths_in_capri_dict(
return new_capri_dict


def zip_top_ranked(capri_filename: FilePath, cluster_ranking: ClRank, summary_name: FilePath) -> None:
"""
Zip the top ranked structures.
Parameters
----------
cluster_ranking : dict
{cluster_id : cluster_rank} dictionary
ss_file : str or Path
capri ss filename
Returns
-------
output_zipfile : str or Path
path to the zipped file
"""
capri_df = read_capri_table(capri_filename, comment="#")
gb_cluster = capri_df.groupby("cluster-id")
for cl_id, cl_df in gb_cluster:
if cl_id in cluster_ranking.keys():
if cl_id != "-":
structs = cl_df.loc[cl_df["model-cluster-ranking"] <= 4][["model", "model-cluster-ranking"]]
else:
structs = cl_df.loc[cl_df["caprieval_rank"] <= 10][["model", "caprieval_rank"]]
structs.columns = ["model", "rank"]
# iterate over the structures
for _, row in structs.iterrows():
struct = Path(row["model"])
struct_gz = Path(f"{struct}.gz")
rank = row["rank"]
# set target name
if cl_id != "-":
target_name = f"cluster_{cluster_ranking[cl_id]}_model_{rank}.pdb"
else:
target_name = f"model_{rank}.pdb"
# copy the structure
if Path(struct).exists():
shutil.copy(struct, Path(target_name))
elif struct_gz.exists():
shutil.copy(struct_gz, ".")
# unpack the file
_unpack_gz(Path(".", struct_gz.name))
shutil.move(struct.name, Path(target_name))
else:
log.warning(f"structure {struct} not found")

# now make the archive and delete the pdb files
archive_files_ext(".", "pdb")
for file in Path(".").glob("*.pdb"):
file.unlink()
# move archive to summary
expected_archive = Path(".", "pdb.tgz")
if expected_archive.exists():
shutil.move("pdb.tgz", summary_name)
log.info(f"Summary archive {summary_name} created!")
else:
log.warning(f"Summary archive {summary_name} not created!")


def analyse_step(
step: str,
run_dir: FilePath,
Expand Down Expand Up @@ -372,6 +433,8 @@ def analyse_step(
boxes = box_plot_handler(ss_file, cluster_ranking, format, scale)
tables = clt_table_handler(clt_file, ss_file, is_cleaned)
report_generator(boxes, scatters, tables, step)
# provide a zipped archive of the top ranked structures
zip_top_ranked(ss_file, cluster_ranking, Path("summary.tgz"))


def main(
Expand Down
23 changes: 23 additions & 0 deletions tests/test_cli_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
import os
import shutil
from pathlib import Path
import tempfile

import pytest

from haddock.clis.cli_analyse import (
get_cluster_ranking,
main,
update_capri_dict,
zip_top_ranked,
)
from haddock.gear.yaml2cfg import read_from_yaml_config
from haddock.modules.analysis.caprieval import \
Expand Down Expand Up @@ -91,3 +93,24 @@ def test_main(example_capri_ss, example_capri_clt):
assert len(html_files) > 0

shutil.rmtree(run_dir)


def test_zip_top_ranked(example_capri_ss):
"""Test cli_analyse zip_top_ranked function."""
cwd = os.getcwd()
with tempfile.TemporaryDirectory() as tmpdir:
os.chdir(tmpdir)
# build fake run_dir
rigid_dir = "1_rigidbody"
rigid_dir_analysis = "1_rigidbody_analysis"
os.mkdir(rigid_dir)
os.mkdir(rigid_dir_analysis)
# fill rigidbody directory with one file
shutil.copy(Path(golden_data, "protprot_complex_1.pdb"), Path(rigid_dir, "rigidbody_383.pdb"))
os.chdir(rigid_dir_analysis)

exp_cl_ranking = {1: 2}
zip_top_ranked(example_capri_ss, exp_cl_ranking, "summary.tgz")
assert os.path.isfile("summary.tgz") is True
os.chdir(cwd)

0 comments on commit debcc24

Please sign in to comment.