Skip to content

Commit

Permalink
add final tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ahaessly committed Feb 18, 2020
1 parent 3d24082 commit 3f2937e
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
package org.broadinstitute.hellbender.tools.walkers.mutect.filtering;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.IndexRange;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,10 +292,10 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) {
filters.add(new TumorEvidenceFilter());
filters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality));
filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength));
filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); // test
filters.add(new StrandArtifactFilter()); // test gvcf
filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount));
filters.add(new StrandArtifactFilter());
filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate));
filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); // test gvcf
filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand));
filters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition));
filters.add(new MinAlleleFractionFilter(MTFAC.minAf));

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
package org.broadinstitute.hellbender.tools.walkers.mutect.filtering;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.math3.util.CombinatoricsUtils;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils;
import org.broadinstitute.hellbender.tools.walkers.validation.basicshortmutpileup.BetaBinomialDistribution;
import org.broadinstitute.hellbender.utils.IndexRange;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.OptimizationUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
Expand Down Expand Up @@ -62,7 +60,7 @@ public List<EStep> calculateArtifactProbabilities(final VariantContext vc, final
}
// remove symbolic alleles
if (vc.hasSymbolicAlleles()) {
sbs = GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, sbs);
sbs = GATKVariantContextUtils.removeDataForSymbolicAlleles(vc, sbs);
}

final List<Integer> indelSizes = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.VariantContextGetters;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.util.Collections;
import java.util.List;
Expand All @@ -26,6 +26,10 @@ public List<Boolean> areAllelesArtifacts(final VariantContext vc, final Mutect2F
if (minReadsOnEachStrand == 0 || sbs == null || sbs.isEmpty() || sbs.size() <= 1) {
return Collections.emptyList();
}
// remove symbolic alleles
if (vc.hasSymbolicAlleles()) {
sbs = GATKVariantContextUtils.removeDataForSymbolicAlleles(vc, sbs);
}
// skip the reference
return sbs.subList(1, sbs.size()).stream().map(altList -> altList.stream().anyMatch(x -> x == 0)).collect(Collectors.toList());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2001,10 +2001,19 @@ public static boolean isUnmixedMnpIgnoringNonRef(final VariantContext vc) {
}

public static <T> List<T> removeDataForSymbolicAltAlleles(VariantContext vc, List<T> data) {
return removeDataForSymbolicAlleles(vc, data, false);
}

public static <T> List<T> removeDataForSymbolicAlleles(VariantContext vc, List<T> data) {
return removeDataForSymbolicAlleles(vc, data, true);
}

protected static <T> List<T> removeDataForSymbolicAlleles(VariantContext vc, List<T> data, boolean dataContainsReference) {
if (vc.hasSymbolicAlleles()) {
List<Allele> symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList());
// convert allele index to alt allele index
List<Integer> symAltIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-1).collect(Collectors.toList());
// convert allele index to index for data
int offset = dataContainsReference ? 0 : 1;
List<Integer> symAltIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-offset).collect(Collectors.toList());
return removeItemsByIndex(data, symAltIndexes);
} else {
return data;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -528,18 +528,18 @@ public Object[][] vcfsForFiltering() {
)},
{NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList(
Collections.emptySet(),
ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME),
ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME),
ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME),
ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME,GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME),
Collections.emptySet(),
ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME,
GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME,
GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME)),
GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME)),
Arrays.asList(
Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), //".",
Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual|.",
Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "weak_evidence, possible_numt|.",
Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, low_allele_frac|.",
Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "weak_evidence, base_qual, map_qual, contamination, position, low_allele_frac, possible_numt|."
Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual, strand_bias|.",
Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "weak_evidence, strict_strand, strand_bias, possible_numt|.",
Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|.",
Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "weak_evidence, base_qual, map_qual, contamination, strand_artifact, position, low_allele_frac, possible_numt|."
)}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
##GVCFBlock0-5=minGQ=0(inclusive),maxGQ=5(exclusive)
##GVCFBlock10-2147483647=minGQ=10(inclusive),maxGQ=2147483647(exclusive)
##GVCFBlock5-10=minGQ=5(inclusive),maxGQ=10(exclusive)
##INFO=<ID=AS_SB_TABLE,Number=1,Type=String,Description="Tandem repeat unit (bases)">
##INFO=<ID=SEQQ,Number=1,Type=Integer,Description="Phred-scaled qualities that alt allele are not due to sequencing error">
##INFO=<ID=CONTQ,Number=1,Type=Float,Description="Phred-scaled qualities that alt allele are not due to contamination">
##INFO=<ID=STRANDQ,Number=1,Type=Float,Description="Phred-scaled qualities that alt allele are not due to strand artifact">
Expand Down Expand Up @@ -155,9 +156,9 @@ MT 31 . C <NON_REF> . . END=31 GT:DP:MIN_DP:TLOD 0/0:1338:1338:-3.107e+00
MT 32 . A <NON_REF> . . END=34 GT:DP:MIN_DP:TLOD 0/0:1435:1360:-2.827e+00
MT 35 . G <NON_REF> . . END=35 GT:DP:MIN_DP:TLOD 0/0:1576:1576:-1.333e+00
MT 36 . G <NON_REF> . . END=36 GT:DP:MIN_DP:TLOD 0/0:1614:1614:-2.286e+00
MT 37 . A G,<NON_REF> . . DP=1594;ECNT=4;MBQ=27,6,0;MFRL=575,15999,0;MMQ=60,60,0;MPOS=7,0;POPAF=7.30,7.30;SAAF=0.010,0.010,7.242e-03;SAPP=3.734e-04,4.515e-03,0.995;TLOD=-2.556e+00,-2.886e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2:1508,11,0:1.092e-03,6.494e-04:1519:674,7,0:834,4,0
MT 37 . A G,<NON_REF> . . AS_SB_TABLE=1000,508|8,3|0,0;DP=1594;ECNT=4;MBQ=27,6,0;MFRL=575,15999,0;MMQ=60,60,0;MPOS=7,0;POPAF=7.30,7.30;SAAF=0.010,0.010,7.242e-03;SAPP=3.734e-04,4.515e-03,0.995;TLOD=-2.556e+00,-2.886e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2:1508,11,0:1.092e-03,6.494e-04:1519:674,7,0:834,4,0
MT 38 . G <NON_REF> . . END=39 GT:DP:MIN_DP:TLOD 0/0:1554:1534:-3.194e+00
MT 40 . T G,<NON_REF> . . DP=1709;ECNT=4;MBQ=28,21,0;MFRL=562,16148,0;MMQ=60,60,0;MPOS=27,0;POPAF=7.30,7.30;SAAF=0.010,0.00,5.851e-04;SAPP=1.826e-04,5.519e-04,0.999;TLOD=-1.650e+00,-2.932e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:1708,1,0:1.148e-03,5.841e-04:1709:766,1,0:942,0,0:0|1:40_T_G:40
MT 40 . T G,<NON_REF> . . AS_SB_TABLE=1000,708|1,0|0,0;DP=1709;ECNT=4;MBQ=28,21,0;MFRL=562,16148,0;MMQ=60,60,0;MPOS=27,0;POPAF=7.30,7.30;SAAF=0.010,0.00,5.851e-04;SAPP=1.826e-04,5.519e-04,0.999;TLOD=-1.650e+00,-2.932e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:1708,1,0:1.148e-03,5.841e-04:1709:766,1,0:942,0,0:0|1:40_T_G:40
MT 41 . C <NON_REF> . . END=42 GT:DP:MIN_DP:TLOD 0/0:1678:1657:-3.231e+00
MT 43 . C A,<NON_REF> . . DP=1833;ECNT=4;MBQ=27,27,0;MFRL=532,16148,0;MMQ=60,60,0;MPOS=30,0;POPAF=7.30,7.30;SAAF=0.010,0.00,5.456e-04;SAPP=1.650e-04,5.440e-04,0.999;TLOD=-1.710e+00,-2.963e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:1832,1,0:1.069e-03,5.447e-04:1833:819,1,0:1013,0,0:0|1:40_T_G:40
MT 44 . C <NON_REF> . . END=50 GT:DP:MIN_DP:TLOD 0/0:1904:1779:-3.311e+00
Expand Down Expand Up @@ -210,9 +211,9 @@ MT 132 . C <NON_REF> . . END=142 GT:DP:MIN_DP:TLOD 0/0:5872:5676:-3.776e+00
MT 143 . G A,C,<NON_REF> . . DP=6192;ECNT=17;MBQ=29,29,2,0;MFRL=456,8242,543,0;MMQ=60,60,60,0;MPOS=37,12,0;POPAF=7.30,7.30,7.30;SAAF=0.010,0.010,9.870e-04;SAPP=1.829e-04,1.642e-04,1.000;TLOD=-2.341e+00,-3.195e+00,-3.309e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2/3:6073,6,7,0:6.092e-04,2.090e-04,1.635e-04:6086:2878,2,3,0:3195,4,4,0
MT 144 . C <NON_REF> . . END=150 GT:DP:MIN_DP:TLOD 0/0:6275:6144:-3.763e+00
MT 151 . C A,<NON_REF> . . DP=6495;ECNT=17;MBQ=28,13,0;MFRL=440,8341,0;MMQ=60,60,0;MPOS=14,0;POPAF=7.30,7.30;SAAF=0.010,0.010,6.260e-04;SAPP=1.238e-04,1.191e-04,1.000;TLOD=-3.505e+00,-3.507e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2:6386,4,0:1.561e-04,1.557e-04:6390:2990,2,0:3396,2,0
MT 152 . T C,A,<NON_REF> . . DP=6523;ECNT=17;MBQ=29,29,13,0;MFRL=291,438,16046,0;MMQ=60,60,60,0;MPOS=37,23,0;POPAF=7.30,7.30,7.30;SAAF=0.990,0.990,0.999;SAPP=0.028,0.024,0.948;TLOD=20915.39,-3.329e+00,-3.333e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2/3:7,6417,8,0:0.999,1.562e-04,1.549e-04:6432:4,3002,2,0:3,3415,6,0
MT 152 . T C,A,<NON_REF> . . AS_SB_TABLE=5,2|3487,3000|6,2|0,0;DP=6523;ECNT=17;MBQ=29,29,13,0;MFRL=291,438,16046,0;MMQ=60,60,60,0;MPOS=37,23,0;POPAF=7.30,7.30,7.30;SAAF=0.990,0.990,0.999;SAPP=0.028,0.024,0.948;TLOD=20915.39,-3.329e+00,-3.333e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2/3:7,6417,8,0:0.999,1.562e-04,1.549e-04:6432:4,3002,2,0:3,3415,6,0
MT 153 . A <NON_REF> . . END=156 GT:DP:MIN_DP:TLOD 0/0:6486:6477:-3.814e+00
MT 157 . T A,<NON_REF> . . DP=6625;ECNT=17;MBQ=29,0,0;MFRL=429,0,0;MMQ=60,0,0;MPOS=0,0;POPAF=7.30,7.30;SAAF=0.00,0.00,0.00;SAPP=4.826e-05,4.803e-05,1.000;TLOD=-3.517e+00,-3.517e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:6546,0,0:1.519e-04,1.519e-04:6546:3057,0,0:3489,0,0:0|1:157_T_A:157
MT 157 . T A,<NON_REF> . . AS_SB_TABLE=546,6|0,0|0,0;DP=6625;ECNT=17;MBQ=29,0,0;MFRL=429,0,0;MMQ=60,0,0;MPOS=0,0;POPAF=7.30,7.30;SAAF=0.00,0.00,0.00;SAPP=4.826e-05,4.803e-05,1.000;TLOD=-3.517e+00,-3.517e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:6546,0,0:1.519e-04,1.519e-04:6546:3057,0,0:3489,0,0:0|1:157_T_A:157
MT 158 . T <NON_REF> . . END=162 GT:DP:MIN_DP:TLOD 0/0:6523:6483:-3.765e+00
MT 163 . G GCA,<NON_REF> . . DP=6669;ECNT=17;MBQ=25,0,0;MFRL=419,0,0;MMQ=60,0,0;MPOS=0,0;POPAF=7.30,7.30;SAAF=0.00,0.00,0.00;SAPP=4.745e-05,4.832e-05,1.000;TLOD=-3.520e+00,-3.520e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:6582,0,0:1.510e-04,1.510e-04:6582:3078,0,0:3504,0,0:0|1:157_T_A:157
MT 164 . C <NON_REF> . . END=165 GT:DP:MIN_DP:TLOD 0/0:6517:6510:-3.807e+00
Expand Down
Binary file not shown.

0 comments on commit 3f2937e

Please sign in to comment.