Skip to content

Commit

Permalink
Hot fix when using variant mapping with bgen files
Browse files Browse the repository at this point in the history
I changed the logic flow to first check whitelist then do variant mapping, this is because if we are using a variant mapping file to map between the genotype snps and model snps the varid gets updated to varid_ when there is a match and varid takes a new id which doesn't match with ids in the whitelist. With this update we ensure that change does not affect the selection at whitelisting step.
  • Loading branch information
Fnyasimi authored Oct 31, 2024
1 parent 4b3ad95 commit e7ecd1e
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions software/metax/genotype/BGENGenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def bgen_file_geno_lines(file, variant_mapping = None, force_colon = False, use_
if chr == "NA" or pos == "NA":
continue

if whitelist and not varid in whitelist:
continue

if variant_mapping:
if dict_mapping:
if not varid in variant_mapping:
Expand All @@ -48,10 +51,6 @@ def bgen_file_geno_lines(file, variant_mapping = None, force_colon = False, use_
# the alleles in the genotype might be swapped respect the variant in the mapping
# You should verify if you must match it


if whitelist and not varid in whitelist:
continue

v = bgen["genotype"][variant.Index].compute()
if v["phased"]:
d = numpy.apply_along_axis(lambda x: x[1] + x[3], 1, numpy.array(v["probs"], dtype=float))
Expand All @@ -74,4 +73,4 @@ def get_samples(path):
bgen = bgen_reader.read_bgen(path, verbose=False)
samples = bgen["samples"].values
samples = pandas.DataFrame({"FID":samples, "IID":samples})[["FID", "IID"]]
return samples
return samples

0 comments on commit e7ecd1e

Please sign in to comment.