Skip to content

Commit

Permalink
run validation on sv get_ht
Browse files Browse the repository at this point in the history
  • Loading branch information
jklugherz committed Mar 5, 2025
1 parent 0addb5b commit 5412715
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 6 deletions.
1 change: 1 addition & 0 deletions v03_pipeline/lib/annotations/sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def gnomad_svs(
gnomad_svs_ht: hl.Table,
**_: Any,
) -> hl.Expression:
gnomad_svs_ht = gnomad_svs_ht.drop('locus', 'alleles')
return gnomad_svs_ht.annotate(
ID=gnomad_svs_ht.KEY,
)[ht['info.GNOMAD_V4.1_TRUTH_VID']]
Expand Down
3 changes: 0 additions & 3 deletions v03_pipeline/lib/misc/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ def validate_allele_type(
**_: Any,
) -> None:
ht = t.rows() if isinstance(t, hl.MatrixTable) else t
if not hasattr(ht, 'alleles'):
return

ht = ht.filter(
dataset_type.invalid_allele_types.contains(
hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]),
Expand Down
3 changes: 1 addition & 2 deletions v03_pipeline/lib/reference_datasets/gnomad_svs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@ def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
N_HET=ht.info.N_HET,
N_HOMREF=ht.info.N_HOMREF,
)
ht = ht.key_by('KEY')
return ht.drop('locus', 'alleles')
return ht.key_by('KEY')
30 changes: 30 additions & 0 deletions v03_pipeline/lib/reference_datasets/gnomad_svs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht):
[
hl.Struct(
KEY='gnomAD-SV_v3_BND_chr1_1a45f73a',
locus=hl.Locus(
contig='chr1',
position=10434,
reference_genome=ReferenceGenome.GRCh38,
),
alleles=['N', '<BND>'],
AF=0.11413399875164032,
AC=8474,
AN=74246,
Expand All @@ -29,6 +35,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht):
),
hl.Struct(
KEY='gnomAD-SV_v3_BND_chr1_3fa36917',
locus=hl.Locus(
contig='chr1',
position=10440,
reference_genome=ReferenceGenome.GRCh38,
),
alleles=['N', '<BND>'],
AF=0.004201000090688467,
AC=466,
AN=110936,
Expand All @@ -37,6 +49,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht):
),
hl.Struct(
KEY='gnomAD-SV_v3_BND_chr1_7bbf34b5',
locus=hl.Locus(
contig='chr1',
position=10464,
reference_genome=ReferenceGenome.GRCh38,
),
alleles=['N', '<BND>'],
AF=0.03698499873280525,
AC=3119,
AN=84332,
Expand All @@ -45,6 +63,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht):
),
hl.Struct(
KEY='gnomAD-SV_v3_BND_chr1_933a2971',
locus=hl.Locus(
contig='chr1',
position=10450,
reference_genome=ReferenceGenome.GRCh38,
),
alleles=['N', '<BND>'],
AF=0.3238990008831024,
AC=21766,
AN=67200,
Expand All @@ -53,6 +77,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht):
),
hl.Struct(
KEY='gnomAD-SV_v3_DUP_chr1_01c2781c',
locus=hl.Locus(
contig='chr1',
position=10000,
reference_genome=ReferenceGenome.GRCh38,
),
alleles=['N', '<DUP>'],
AF=0.0019970000721514225,
AC=139,
AN=69594,
Expand Down
2 changes: 1 addition & 1 deletion v03_pipeline/lib/reference_datasets/reference_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def get_ht(
ht = filter_contigs(ht, reference_genome)
for dataset_type in self.dataset_types(reference_genome):
validate_allele_type(ht, dataset_type)
validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL)
validate_no_duplicate_variants(ht, reference_genome, dataset_type)
# NB: we do not filter with "filter" here
# ReferenceDatasets are DatasetType agnostic and that
# filter is only used at annotation time.
Expand Down

0 comments on commit 5412715

Please sign in to comment.