From 0833d3b101062bfb55f19411ea0d0a75a60f2f64 Mon Sep 17 00:00:00 2001 From: shiraz-shah <59959428+shiraz-shah@users.noreply.github.com> Date: Tue, 14 Jan 2025 00:10:45 +0300 Subject: [PATCH] Update parse_gtdbtk.Snakefile Changed mmseqs gene clustering to coverage mode 1, so gene fragments do not end up as separate clusters. --- maginator/workflow/parse_gtdbtk.Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maginator/workflow/parse_gtdbtk.Snakefile b/maginator/workflow/parse_gtdbtk.Snakefile index f8f1165..57654c5 100644 --- a/maginator/workflow/parse_gtdbtk.Snakefile +++ b/maginator/workflow/parse_gtdbtk.Snakefile @@ -64,7 +64,7 @@ rule repres_genes: conda: "envs/filter_gtdbtk.yaml" shell: - "mmseqs easy-linclust --min-seq-id {params.seq_id} -c {params.cov} --threads {threads} {input} {params.out_prefix} {params.tmp_dir}; rm -r {params.tmp_dir};" + "mmseqs easy-cluster --cov-mode 1 --min-seq-id {params.seq_id} -c {params.cov} --threads {threads} {input} {params.out_prefix} {params.tmp_dir}; rm -r {params.tmp_dir};" # Add gene clusters to GTDB-tk data