From e7ecd1e1bd5d07490c0df2cc05dd407f3eba767d Mon Sep 17 00:00:00 2001 From: Fnyasimi <41294948+Fnyasimi@users.noreply.github.com> Date: Thu, 31 Oct 2024 12:02:11 -0500 Subject: [PATCH] Hot fix when using variant mapping with bgen files I changed the logic flow to first check whitelist then do variant mapping, this is because if we are using a variant mapping file to map between the genotype snps and model snps the varid gets updated to varid_ when there is a match and varid takes a new id which doesn't match with ids in the whitelist. With this update we ensure that change does not affect the selection at whitelisting step. --- software/metax/genotype/BGENGenotype.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/software/metax/genotype/BGENGenotype.py b/software/metax/genotype/BGENGenotype.py index b1c2f40..abc625f 100644 --- a/software/metax/genotype/BGENGenotype.py +++ b/software/metax/genotype/BGENGenotype.py @@ -35,6 +35,9 @@ def bgen_file_geno_lines(file, variant_mapping = None, force_colon = False, use_ if chr == "NA" or pos == "NA": continue + if whitelist and not varid in whitelist: + continue + if variant_mapping: if dict_mapping: if not varid in variant_mapping: @@ -48,10 +51,6 @@ def bgen_file_geno_lines(file, variant_mapping = None, force_colon = False, use_ # the alleles in the genotype might be swapped respect the variant in the mapping # You should verify if you must match it - - if whitelist and not varid in whitelist: - continue - v = bgen["genotype"][variant.Index].compute() if v["phased"]: d = numpy.apply_along_axis(lambda x: x[1] + x[3], 1, numpy.array(v["probs"], dtype=float)) @@ -74,4 +73,4 @@ def get_samples(path): bgen = bgen_reader.read_bgen(path, verbose=False) samples = bgen["samples"].values samples = pandas.DataFrame({"FID":samples, "IID":samples})[["FID", "IID"]] - return samples \ No newline at end of file + return samples