diff --git a/v03_pipeline/lib/annotations/sv.py b/v03_pipeline/lib/annotations/sv.py index 278c045eb..868232a2f 100644 --- a/v03_pipeline/lib/annotations/sv.py +++ b/v03_pipeline/lib/annotations/sv.py @@ -180,6 +180,7 @@ def gnomad_svs( gnomad_svs_ht: hl.Table, **_: Any, ) -> hl.Expression: + gnomad_svs_ht = gnomad_svs_ht.drop('locus', 'alleles') return gnomad_svs_ht.annotate( ID=gnomad_svs_ht.KEY, )[ht['info.GNOMAD_V4.1_TRUTH_VID']] diff --git a/v03_pipeline/lib/misc/validation.py b/v03_pipeline/lib/misc/validation.py index 234f11edc..063312f47 100644 --- a/v03_pipeline/lib/misc/validation.py +++ b/v03_pipeline/lib/misc/validation.py @@ -31,9 +31,6 @@ def validate_allele_type( **_: Any, ) -> None: ht = t.rows() if isinstance(t, hl.MatrixTable) else t - if not hasattr(ht, 'alleles'): - return - ht = ht.filter( dataset_type.invalid_allele_types.contains( hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]), diff --git a/v03_pipeline/lib/reference_datasets/gnomad_svs.py b/v03_pipeline/lib/reference_datasets/gnomad_svs.py index 5743de3c4..114a45f3b 100644 --- a/v03_pipeline/lib/reference_datasets/gnomad_svs.py +++ b/v03_pipeline/lib/reference_datasets/gnomad_svs.py @@ -14,5 +14,4 @@ def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table: N_HET=ht.info.N_HET, N_HOMREF=ht.info.N_HOMREF, ) - ht = ht.key_by('KEY') - return ht.drop('locus', 'alleles') + return ht.key_by('KEY') diff --git a/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py b/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py index 8cc1dd90c..ff24aa3d1 100644 --- a/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py +++ b/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py @@ -21,6 +21,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): [ hl.Struct( KEY='gnomAD-SV_v3_BND_chr1_1a45f73a', + locus=hl.Locus( + contig='chr1', + position=10434, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.11413399875164032, AC=8474, AN=74246, @@ -29,6 +35,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): ), hl.Struct( KEY='gnomAD-SV_v3_BND_chr1_3fa36917', + locus=hl.Locus( + contig='chr1', + position=10440, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.004201000090688467, AC=466, AN=110936, @@ -37,6 +49,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): ), hl.Struct( KEY='gnomAD-SV_v3_BND_chr1_7bbf34b5', + locus=hl.Locus( + contig='chr1', + position=10464, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.03698499873280525, AC=3119, AN=84332, @@ -45,6 +63,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): ), hl.Struct( KEY='gnomAD-SV_v3_BND_chr1_933a2971', + locus=hl.Locus( + contig='chr1', + position=10450, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.3238990008831024, AC=21766, AN=67200, @@ -53,6 +77,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): ), hl.Struct( KEY='gnomAD-SV_v3_DUP_chr1_01c2781c', + locus=hl.Locus( + contig='chr1', + position=10000, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.0019970000721514225, AC=139, AN=69594, diff --git a/v03_pipeline/lib/reference_datasets/reference_dataset.py b/v03_pipeline/lib/reference_datasets/reference_dataset.py index d5add22e7..d2adab683 100644 --- a/v03_pipeline/lib/reference_datasets/reference_dataset.py +++ b/v03_pipeline/lib/reference_datasets/reference_dataset.py @@ -151,7 +151,7 @@ def get_ht( ht = filter_contigs(ht, reference_genome) for dataset_type in self.dataset_types(reference_genome): validate_allele_type(ht, dataset_type) - validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL) + validate_no_duplicate_variants(ht, reference_genome, dataset_type) # NB: we do not filter with "filter" here # ReferenceDatasets are DatasetType agnostic and that # filter is only used at annotation time.