Skip to content

Commit

Permalink
parsing maxambiguity and minlength options as flagged in issue #405
Browse files Browse the repository at this point in the history
  • Loading branch information
aineniamh committed Apr 6, 2022
1 parent 65a6787 commit 10254b4
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pangolin/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def main(sysargs = sys.argv[1:]):
config[KEY_OUTFILE] = io.set_up_outfile(args.outfile, config[KEY_OUTFILE],config[KEY_OUTDIR])
io.set_up_tempdir(args.tempdir,args.no_temp,cwd,config[KEY_OUTDIR], config)
config[KEY_ALIGNMENT_FILE],config[KEY_ALIGNMENT_OUT] = io.parse_alignment_options(args.alignment, config[KEY_OUTDIR], config[KEY_TEMPDIR],args.alignment_file, config[KEY_ALIGNMENT_FILE])

parse_qc_thresholds(args.maxambig, args.minlen, config[KEY_REFERENCE_FASTA], config)
config[KEY_QUERY_FASTA] = io.find_query_file(cwd, config[KEY_TEMPDIR], args.query)

io.quick_check_query_file(cwd, args.query, config[KEY_QUERY_FASTA])
Expand Down
1 change: 0 additions & 1 deletion pangolin/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

KEY_DATADIR="datadir"

KEY_MINLEN="minlen"
KEY_MAXAMBIG="maxambig"
KEY_TRIM_START="trim_start"
KEY_TRIM_END="trim_end"
Expand Down
31 changes: 30 additions & 1 deletion pangolin/utils/initialising.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import itertools
from distutils.version import LooseVersion
from Bio import SeqIO

import pangolin.utils.custom_logger as custom_logger
from pangolin.utils.log_colours import green,cyan
Expand Down Expand Up @@ -33,7 +34,6 @@ def setup_config_dict(cwd):

KEY_DATADIR:None,

KEY_MINLEN: 25000,
KEY_MAXAMBIG: 0.3,
KEY_TRIM_START:265, # where to pad to using datafunk
KEY_TRIM_END:29674, # where to pad after using datafunk
Expand Down Expand Up @@ -176,6 +176,35 @@ def setup_data(datadir_arg,analysis_mode, config):
config[KEY_DATADIR] = datadir
config[KEY_CONSTELLATION_FILES] = constellation_files

def parse_qc_thresholds(maxambig, minlen, reference_fasta, config):

if maxambig:
maxambig = float(maxambig)
if maxambig <=1 and maxambig >= 0:
config[KEY_MAXAMBIG] = maxambig
else:
sys.stderr.write(cyan(f'Error: `--max-ambiguity` should be a float between 0 and 1.\n'))
sys.exit(-1)

if minlen:
minlen = float(minlen)
reflen = 0
for record in SeqIO.parse(reference_fasta,"fasta"):
reflen = len(record)

if minlen>reflen:
sys.stderr.write(cyan(f'Error: `--min-length` should be less than the length of the reference: {ref_len}.\n'))
sys.exit(-1)
else:
new_maxambig = round(1-(minlen/reflen), 3)
print(f"Converting minimum length of {minlen} to maximum ambiguity of {new_maxambig}.")
if new_maxambig > config[KEY_MAXAMBIG]:
config[KEY_MAXAMBIG] = new_maxambig

print(green(f"Maximum ambiguity allowed is {config[KEY_MAXAMBIG]}.\n****"))




def print_alias_file_exit(alias_file):
with open(alias_file, 'r') as handle:
Expand Down
1 change: 1 addition & 0 deletions pangolin/utils/io_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from pangolin.utils.config import *


def find_query_file(cwd, tempdir, query_arg):
if len(query_arg) > 1:
print(cyan(f"Error: Too many query (input) fasta files supplied: {query_arg}\nPlease supply one only."))
Expand Down

0 comments on commit 10254b4

Please sign in to comment.