From e7ecd1e1bd5d07490c0df2cc05dd407f3eba767d Mon Sep 17 00:00:00 2001
From: Fnyasimi <41294948+Fnyasimi@users.noreply.github.com>
Date: Thu, 31 Oct 2024 12:02:11 -0500
Subject: [PATCH] Hot fix when using variant mapping with bgen files

I changed the logic flow to first check whitelist then do variant mapping, this is because if we are using a variant mapping file to map between the genotype snps and model snps the varid gets updated to varid_ when there is a match and varid takes a new id which doesn't match with ids in the whitelist. With this update we ensure that change does not affect the selection at whitelisting step.
---
 software/metax/genotype/BGENGenotype.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/software/metax/genotype/BGENGenotype.py b/software/metax/genotype/BGENGenotype.py
index b1c2f40..abc625f 100644
--- a/software/metax/genotype/BGENGenotype.py
+++ b/software/metax/genotype/BGENGenotype.py
@@ -35,6 +35,9 @@ def bgen_file_geno_lines(file, variant_mapping = None, force_colon = False, use_
             if chr == "NA" or pos == "NA":
                 continue
 
+        if whitelist and not varid in whitelist:
+            continue
+        
         if variant_mapping:
             if dict_mapping:
                 if not varid in variant_mapping:
@@ -48,10 +51,6 @@ def bgen_file_geno_lines(file, variant_mapping = None, force_colon = False, use_
         # the alleles in the genotype might be swapped respect the variant in the mapping
         # You should verify if you must match it
 
-
-        if whitelist and not varid in whitelist:
-            continue
-
         v = bgen["genotype"][variant.Index].compute()
         if v["phased"]:
             d = numpy.apply_along_axis(lambda x: x[1] + x[3], 1, numpy.array(v["probs"], dtype=float))
@@ -74,4 +73,4 @@ def get_samples(path):
     bgen = bgen_reader.read_bgen(path, verbose=False)
     samples = bgen["samples"].values
     samples = pandas.DataFrame({"FID":samples, "IID":samples})[["FID", "IID"]]
-    return samples
\ No newline at end of file
+    return samples