Skip to content

Commit

Permalink
v1.0.1 (#18)
Browse files Browse the repository at this point in the history
* Fix FID+IID filtering, simplify based on tuples.

* Delete unused class

* Delete unused class 2

* Bump version

* Extra comments for clarity

* bump patch version instead

---------

Co-authored-by: Benjamin Wingfield <[email protected]>
  • Loading branch information
smlmbrt and nebfield authored Aug 8, 2024
1 parent 6c62aa5 commit 2c6af1f
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 44 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fraposa-pgsc"
version = "1.0.0"
version = "1.0.1"
description = "Tools to perform ancestry projection to a reference dataset within the calculator pipeline (pgsc_calc)"
homepage = "https://github.com/PGScatalog/fraposa_pgsc"
authors = ["smlmbrt <[email protected]>"]
Expand Down
22 changes: 10 additions & 12 deletions src/fraposa_pgsc/fraposa.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from sklearn.utils.extmath import randomized_svd
from typing import Union

from .sampleid import SampleID

def create_logger(out_filepref='fraposa'):
log = logging.getLogger()
Expand Down Expand Up @@ -139,25 +138,24 @@ def read_bed(bed_filepref, dtype=np.int8, filt_iid=None):
n = len(fam)

if filt_iid:
fam_ids = set(SampleID(x, y) for x, y in zip(fam['fid'], fam['iid'], strict=True))
matched_ids = filt_iid.intersection(fam_ids)
if len(matched_ids) == 0:
fam_ids = list(zip(fam['fid'], fam['iid'])) # create tuples of ids from genotyping files
fam_mask = [x in filt_iid for x in fam_ids] # T/F overlap of genotype data and filter IDs (tuples)
n_matched = sum(fam_mask)
if n_matched == 0:
raise ValueError(f"ERROR: 0 / {len(filt_iid)} ids in filter list match the study dataset")
elif len(fam_ids) != len(fam):
elif len(set(fam_ids)) != len(fam):
raise ValueError("Samples with duplicated FID + IID detected, please remove and retry")

bed = np.zeros(shape=(p, len(matched_ids)), dtype=dtype)
# in will call SampleID's __hash__ method which uses (fid, iid)
fam_mask = pd.Series((x in matched_ids for x in fam_ids), dtype=bool)
i_extract = np.where(fam_mask == True)
bed = np.zeros(shape=(p, n_matched), dtype=dtype)
i_extract = [i for i,x in enumerate(fam_mask) if x is True] # idx to extract from genotype matrix
for (i, (snp, genotypes)) in enumerate(pyp):
bed[i,:] = genotypes[i_extract]
fam = fam.loc[fam_mask,:]
if len(matched_ids) < len(filt_iid):
logging.warning('Warning: only {} / {} ids in filter list match the study dataset'.format(len(matched_ids),
if n_matched < len(filt_iid):
logging.warning('Warning: only {} / {} ids in filter list match the study dataset'.format(n_matched,
len(filt_iid)))
else:
logging.info('Extracted {} samples from study genotyping data'.format(len(matched_ids)))
logging.info('Extracted {} samples from study genotyping data'.format(n_matched))
else:
bed = np.zeros(shape=(p, n), dtype=dtype)
for (i, (snp, genotypes)) in enumerate(pyp):
Expand Down
14 changes: 11 additions & 3 deletions src/fraposa_pgsc/fraposa_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import csv

import fraposa_pgsc.fraposa as fp
from fraposa_pgsc.sampleid import SampleID
import argparse


Expand Down Expand Up @@ -38,8 +37,17 @@ def main():

try:
with open(args.stu_filt_iid) as f:
reader = csv.reader(f, delimiter="\t")
stu_filt_iid = set(SampleID(x[0], x[1]) for x in list(reader))
reader = csv.reader(f, delimiter="\t") # reads columns as str
stu_filt_iid = []
for x in reader:
if x[0] == "0":
stu_filt_iid.append((x[1],x[1])) # replace missing FID with IID
else:
stu_filt_iid.append((x[0], x[1])) # return FID, IID
l_input = len(stu_filt_iid)
stu_filt_iid = set(stu_filt_iid)
if l_input != len(stu_filt_iid):
raise ValueError("Duplicate IDs found in filter list")
except TypeError:
stu_filt_iid = None
except IndexError:
Expand Down
28 changes: 0 additions & 28 deletions src/fraposa_pgsc/sampleid.py

This file was deleted.

0 comments on commit 2c6af1f

Please sign in to comment.