From f9133abae6e5afd5bd0a5dfcddd0532aed682fc9 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 19 Nov 2019 12:01:43 -0500 Subject: [PATCH 01/85] initial impl --- .../mutect/filtering/M2FiltersArgumentCollection.java | 5 +++++ .../walkers/mutect/filtering/Mutect2FilteringEngine.java | 2 +- .../walkers/mutect/filtering/PolymorphicNuMTFilter.java | 8 ++++---- .../hellbender/utils/variant/GATKVCFConstants.java | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java index e25843bb81a..561ffc2ab63 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java @@ -59,6 +59,7 @@ public class M2FiltersArgumentCollection { public static final String MIN_READS_ON_EACH_STRAND_LONG_NAME = "min-reads-per-strand"; public static final String MAX_NUMT_FRACTION_LONG_NAME = "max-numt-fraction"; public static final String MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME = "autosomal-coverage"; + public static final String MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME = "max-numt-autosomal-copies"; public static final String MIN_AF_LONG_NAME = "min-allele-fraction"; private static final int DEFAULT_MAX_EVENTS_IN_REGION = 2; @@ -72,6 +73,7 @@ public class M2FiltersArgumentCollection { private static final int DEFAULT_MIN_READS_ON_EACH_STRAND = 0; private static final double DEFAULT_MAX_NUMT_FRACTION = 0.85; private static final double DEFAULT_MEDIAN_AUTOSOMAL_COVERAGE = 0; + private static final double DEFAULT_MAX_NUMT_AUTOSOMAL_COPIES = 4; private static final double DEFAULT_MIN_AF = 0; @Argument(fullName = MAX_EVENTS_IN_REGION_LONG_NAME, optional = true, doc = "Maximum events in a single assembly region. Filter all variants if exceeded.") @@ -104,6 +106,9 @@ public class M2FiltersArgumentCollection { @Argument(fullName = MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, optional = true, doc = "Median autosomal coverage for filtering potential polymporphic NuMTs when calling on mitochondria.") public double medianAutosomalCoverage = DEFAULT_MEDIAN_AUTOSOMAL_COVERAGE; + @Argument(fullName = MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, optional = true, doc = "Max expected NUMT copies in autosome used for filtering potential polymporphic NuMTs when calling on mitochondria.") + public double maxNuMTAutosomalCopies = DEFAULT_MAX_NUMT_AUTOSOMAL_COPIES; + @Argument(fullName = MAX_NUMT_FRACTION_LONG_NAME, doc="Maximum fraction of alt reads that originally aligned outside the mitochondria. These are due to NuMTs.", optional = true) public double maxNuMTFraction = DEFAULT_MAX_NUMT_FRACTION; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 2c60da7dac6..d7006ef67db 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -236,7 +236,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { if (MTFAC.mitochondria) { filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); - filters.add(new PolymorphicNuMTFilter(MTFAC.medianAutosomalCoverage)); + filters.add(new PolymorphicNuMTFilter(MTFAC.maxNuMTAutosomalCopies)); } else { filters.add(new ClusteredEventsFilter(MTFAC.maxEventsInRegion)); filters.add(new MultiallelicFilter(MTFAC.numAltAllelesThreshold)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymorphicNuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymorphicNuMTFilter.java index 6ff8fd089e3..72f04e48c0b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymorphicNuMTFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymorphicNuMTFilter.java @@ -13,12 +13,12 @@ public class PolymorphicNuMTFilter extends HardFilter { private static final double LOWER_BOUND_PROB = .01; - private static final double MULTIPLE_COPIES_MULTIPLIER = 1.5; +// private static final double MULTIPLE_COPIES_MULTIPLIER = 2.0; private final int maxAltDepthCutoff; - public PolymorphicNuMTFilter(final double medianAutosomalCoverage){ - if (medianAutosomalCoverage != 0) { - final PoissonDistribution autosomalCoverage = new PoissonDistribution(medianAutosomalCoverage * MULTIPLE_COPIES_MULTIPLIER); + public PolymorphicNuMTFilter(final double maxNuMTCopies){ + if (maxNuMTCopies != 0) { + final PoissonDistribution autosomalCoverage = new PoissonDistribution(maxNuMTCopies); maxAltDepthCutoff = autosomalCoverage.inverseCumulativeProbability(1 - LOWER_BOUND_PROB); } else { maxAltDepthCutoff = 0; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index 2c185108a48..da679e4227d 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -163,7 +163,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i public final static String N_RATIO_FILTER_NAME = "n_ratio"; public final static String CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME = "numt_chimera"; //mitochondria public final static String ALLELE_FRACTION_FILTER_NAME = "low_allele_frac"; - public static final String POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME = "numt_novel"; + public static final String POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME = "possible_numt"; public static final List MUTECT_FILTER_NAMES = Arrays.asList(VCFConstants.PASSES_FILTERS_v4, POLYMERASE_SLIPPAGE, PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_EVIDENCE_FILTER_NAME, GERMLINE_RISK_FILTER_NAME, From 49875a679a2fd98af6d7e4dd28a657cbf75e8458 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 19 Nov 2019 13:04:30 -0500 Subject: [PATCH 02/85] start impl new refactor adding allele specific filters in mutect2 fix headers AS filter status working - still need to fix site filter --- .../mutect/filtering/ErrorProbabilities.java | 16 +++-- .../mutect/filtering/FilterMutectCalls.java | 5 ++ .../filtering/FilteringOutputStats.java | 2 + .../mutect/filtering/HardAlleleFilter.java | 23 +++++++ .../M2FiltersArgumentCollection.java | 6 +- .../mutect/filtering/Mutect2AlleleFilter.java | 69 +++++++++++++++++++ .../mutect/filtering/Mutect2Filter.java | 43 ++++++++++++ .../filtering/Mutect2FilteringEngine.java | 58 +++++++++++++++- .../filtering/Mutect2VariantFilter.java | 43 +----------- .../walkers/mutect/filtering/NuMTFilter.java | 57 +++++++++++++++ .../filtering/PolymorphicNuMTFilter.java | 51 -------------- .../utils/variant/GATKVCFConstants.java | 6 +- .../utils/variant/GATKVCFHeaderLines.java | 2 +- .../mutect/Mutect2IntegrationTest.java | 24 ++++--- 14 files changed, 290 insertions(+), 115 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymorphicNuMTFilter.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 71732c703ea..23461568caa 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; @@ -7,18 +8,24 @@ import java.util.EnumMap; import java.util.List; import java.util.Map; +import java.util.function.Function; import java.util.stream.Collectors; public final class ErrorProbabilities { private final Map probabilitiesByFilter; + private final Map, Map> probabilitiesByFilterAndAllele; private final EnumMap probabilitiesByType; private final double errorProbability; - public ErrorProbabilities(final List filters, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, final ReferenceContext referenceContext) { - probabilitiesByFilter = filters.stream().collect(Collectors.toMap(f -> f, f -> f.errorProbability(vc, filteringEngine, referenceContext))); + public ErrorProbabilities(final List variantFilters, final List> alleleFilters, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, final ReferenceContext referenceContext) { + probabilitiesByFilter = variantFilters.stream().collect(Collectors.toMap(Function.identity(), f -> f.errorProbability(vc, filteringEngine, referenceContext))); + probabilitiesByFilterAndAllele = alleleFilters.stream().collect(Collectors.toMap(Function.identity(), f -> f.errorProbability(vc, filteringEngine, referenceContext))); probabilitiesByType = Arrays.stream(ErrorType.values()).collect(Collectors.toMap(v -> v, v -> 0.0, (a,b) -> a, () -> new EnumMap<>(ErrorType.class))); - filters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> Math.max(prob, probabilitiesByFilter.get(f)))); + variantFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> Math.max(prob, probabilitiesByFilter.get(f)))); + alleleFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> + Math.max(prob, + probabilitiesByFilterAndAllele.get(f).values().stream().max(Double::compare).get()))); // treat errors of different types as independent double trueProbability = 1; @@ -33,6 +40,5 @@ public ErrorProbabilities(final List filters, final Varian public double getTechnicalArtifactProbability() { return probabilitiesByType.get(ErrorType.ARTIFACT); } public double getNonSomaticProbability() { return probabilitiesByType.get(ErrorType.NON_SOMATIC); } public Map getProbabilitiesByFilter() { return probabilitiesByFilter; } - - + public Map, Map> getProbabilitiesByFilterAndAllele() { return probabilitiesByFilterAndAllele; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilterMutectCalls.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilterMutectCalls.java index 1e3943973ea..4ce30432738 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilterMutectCalls.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilterMutectCalls.java @@ -120,8 +120,13 @@ public void onTraversalStart() { .collect(Collectors.toSet()); headerLines.add(new VCFHeaderLine(FILTERING_STATUS_VCF_KEY, "These calls have been filtered by " + FilterMutectCalls.class.getSimpleName() + " to label false positives with a list of failed filters and true positives with PASS.")); + // all possible filters, even allele specific (since they can apply to the site as well GATKVCFConstants.MUTECT_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add); + // these are the possible allele specific filters which will be in the INFO section + // when all relevant alleles (non-symbolic, etc) are filtered, the filter will be applied to the site level filter also + GATKVCFConstants.MUTECT_AS_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getInfoLine).forEach(headerLines::add); + headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java index 38919ac074a..5eb237105b7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java @@ -50,6 +50,8 @@ public void recordCall(final ErrorProbabilities errorProbabilities, final double filterFPs.get(entry.getKey()).add(filterArtifactProbability); } } + + //TODO add analysis for errorProbabilities.getProbabilitiesByFilterAndAllele(); } public void writeFilteringStats(final Path filteringStats, final double threshold, List> clusteringMetadata) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java new file mode 100644 index 00000000000..dbfbf9c28f6 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java @@ -0,0 +1,23 @@ +package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; + +import java.util.*; + +public abstract class HardAlleleFilter extends Mutect2AlleleFilter { + public void calculateErrorProbabilityForAlleles(LinkedHashMap results, final LinkedHashMap> dataByAllele, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + Map alleleArtifacts = areAllelesArtifacts(dataByAllele, vc, filteringEngine, referenceContext); + // only set values for alleles returned + alleleArtifacts.forEach((key, value) -> results.put(key, value ? 1.0 : 0.0)); + } + + public abstract Map areAllelesArtifacts(final LinkedHashMap> dataByAllele, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); + + // the posterior of a hard filter is 0 or 1, hence there's no reason to annotate it + @Override + public Optional phredScaledPosteriorAnnotationName() { + return Optional.empty(); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java index 561ffc2ab63..3c68a26d431 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java @@ -37,7 +37,7 @@ public class M2FiltersArgumentCollection { public double initialPosteriorThreshold = DEFAULT_INITIAL_POSTERIOR_THRESHOLD; /** - * Mitochondria mode includes the filter{@link ChimericOriginalAlignmentFilter} and {@link PolymorphicNuMTFilter}, + * Mitochondria mode includes the filter{@link ChimericOriginalAlignmentFilter} and {@link NuMTFilter}, * and excludes the filters {@link ClusteredEventsFilter}, {@link MultiallelicFilter}, {@link PolymeraseSlippageFilter}, * {@link FilteredHaplotypeFilter}, {@link FragmentLengthFilter}, and {@link GermlineFilter} */ @@ -103,10 +103,10 @@ public class M2FiltersArgumentCollection { @Argument(fullName = MIN_READS_ON_EACH_STRAND_LONG_NAME, optional = true, doc = "Minimum alt reads required on both forward and reverse strands") public int minReadsOnEachStrand = DEFAULT_MIN_READS_ON_EACH_STRAND; - @Argument(fullName = MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, optional = true, doc = "Median autosomal coverage for filtering potential polymporphic NuMTs when calling on mitochondria.") + @Argument(fullName = MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, optional = true, doc = "Median autosomal coverage for filtering potential NuMTs when calling on mitochondria.") public double medianAutosomalCoverage = DEFAULT_MEDIAN_AUTOSOMAL_COVERAGE; - @Argument(fullName = MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, optional = true, doc = "Max expected NUMT copies in autosome used for filtering potential polymporphic NuMTs when calling on mitochondria.") + @Argument(fullName = MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, optional = true, doc = "Max expected NUMT copies in autosome used for filtering potential NuMTs when calling on mitochondria.") public double maxNuMTAutosomalCopies = DEFAULT_MAX_NUMT_AUTOSOMAL_COPIES; @Argument(fullName = MAX_NUMT_FRACTION_LONG_NAME, doc="Maximum fraction of alt reads that originally aligned outside the mitochondria. These are due to NuMTs.", optional = true) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java new file mode 100644 index 00000000000..75cd0864529 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -0,0 +1,69 @@ +package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; + +import java.util.*; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +public abstract class Mutect2AlleleFilter extends Mutect2Filter { + + public Map applyFilter(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values + LinkedHashMap> dataByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); + + // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele + vc.getGenotypes().stream().filter(filteringEngine::isTumor) + .filter(checkPreconditions()) + .forEach(g -> { + Iterator alleleDataIterator = getData(g).iterator(); + Iterator> dataByAlleleIterator = dataByAllele.values().iterator(); + while(alleleDataIterator.hasNext() && dataByAlleleIterator.hasNext()) + dataByAlleleIterator.next().add(alleleDataIterator.next()); + + }); + + // construct output map with defaults + LinkedHashMap probabilityByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> this.getDefaultProbability(), (a, b) -> null, () -> new LinkedHashMap<>())); + + // now invoke the filter giving it the map with the data separated by allele + calculateErrorProbabilityForAlleles(probabilityByAllele, dataByAllele, vc, filteringEngine, referenceContext); + return probabilityByAllele; + } + + /** + * Subclasses should override if they want a different default probability. + * Keep in mind that in the final output, NaN is used to determine when indicating that no probability was computed, and . will be the output for those alleles + * @return the default probability to use + */ + public Double getDefaultProbability() { + return Double.NaN; + } + + /** + * All subclass filters should implement if they need to verify the data needed exists in the genotype + * @return a predicate that will be applied to determine which genotypes will be part of the filter + */ + public abstract Predicate checkPreconditions(); + + /** + * All subclass filters should implement this method to return the necessary data needed to apply the filter + * @param g the genotype to pull the data from + * @return A list of per-allele data for each allele in the variant context (the data in the genotype is ordered by the alleles returned from vc.getAlleles() + */ + public abstract List getData(Genotype g); + + public Map errorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + return requiredAnnotations().stream().allMatch(vc::hasAttribute) ? + applyFilter(vc, filteringEngine, referenceContext) : + // TODO make sure that somewhere the roundFinitePrecisionErrors is called when not a hard filter +// .entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> Mutect2FilteringEngine.roundFinitePrecisionErrors(entry.getValue()))) : + Collections.emptyMap(); + } + + protected abstract void calculateErrorProbabilityForAlleles(LinkedHashMap results, final LinkedHashMap> dataByAllele, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java new file mode 100644 index 00000000000..4a541cc82d7 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java @@ -0,0 +1,43 @@ +package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; + +import htsjdk.variant.variantcontext.VariantContext; +import org.apache.commons.lang3.tuple.ImmutablePair; + +import java.util.Comparator; +import java.util.List; +import java.util.Optional; + +public abstract class Mutect2Filter { + // by default do nothing, but we may override to allow some filters to learn their parameters in the first pass of {@link FilterMutectCalls} + protected void accumulateDataForLearning(final VariantContext vc, final ErrorProbabilities errorProbabilities, final Mutect2FilteringEngine filteringEngine) { } + protected void clearAccumulatedData() { } + protected void learnParameters() { } + protected void learnParametersAndClearAccumulatedData() { + learnParameters(); + clearAccumulatedData(); + } + + public abstract ErrorType errorType(); + public abstract String filterName(); + public abstract Optional phredScaledPosteriorAnnotationName(); + protected abstract List requiredAnnotations(); + + // weighted median -- what's the lowest posterior probability that accounts for samples with half of the total alt depth + protected static double weightedMedianPosteriorProbability(List> depthsAndPosteriors) { + final int totalAltDepth = depthsAndPosteriors.stream().mapToInt(ImmutablePair::getLeft).sum(); + + // sort from lowest to highest posterior probability of artifact + depthsAndPosteriors.sort(Comparator.comparingDouble(p -> p.getRight())); + + int cumulativeAltCount = 0; + + for (final ImmutablePair pair : depthsAndPosteriors) { + cumulativeAltCount += pair.getLeft(); + if (cumulativeAltCount * 2 >= totalAltDepth) { + return pair.getRight(); + } + } + return 0; + } + +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index d7006ef67db..8df29bb428d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -7,9 +8,11 @@ import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; import java.nio.file.Path; + import org.apache.commons.lang3.mutable.MutableDouble; import org.apache.commons.math3.util.MathArrays; import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.tools.walkers.annotator.StrandBiasTest; import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2Engine; import org.broadinstitute.hellbender.tools.walkers.mutect.MutectStats; @@ -29,6 +32,7 @@ public class Mutect2FilteringEngine { public static final double MIN_REPORTABLE_ERROR_PROBABILITY = 0.1; private final List filters = new ArrayList<>(); + private final List> alleleFilters = new ArrayList<>(); private final Set normalSamples; public static final List STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING = Arrays.asList( @@ -141,8 +145,9 @@ public void accumulateData(final VariantContext vc, final ReferenceContext refer return; } - final ErrorProbabilities errorProbabilities = new ErrorProbabilities(filters, vc, this, referenceContext); + final ErrorProbabilities errorProbabilities = new ErrorProbabilities(filters, alleleFilters, vc, this, referenceContext); filters.forEach(f -> f.accumulateDataForLearning(vc, errorProbabilities, this)); + alleleFilters.forEach(f -> f.accumulateDataForLearning(vc, errorProbabilities, this)); final int[] tumorADs = sumADsOverSamples(vc, true, false); final double[] tumorLogOdds = Mutect2FilteringEngine.getTumorLogOdds(vc); @@ -173,7 +178,7 @@ public void learnThreshold() { public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext vc, final ReferenceContext referenceContext) { final VariantContextBuilder vcb = new VariantContextBuilder(vc).filters(new HashSet<>()); - final ErrorProbabilities errorProbabilities = new ErrorProbabilities(filters, vc, this, referenceContext); + final ErrorProbabilities errorProbabilities = new ErrorProbabilities(filters, alleleFilters, vc, this, referenceContext); filteringOutputStats.recordCall(errorProbabilities, getThreshold() - EPSILON); final boolean variantFailsFilters = errorProbabilities.getErrorProbability() > Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())); @@ -195,9 +200,56 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext } } + // apply allele specific filters + List siteFilters = new ArrayList<>(); + List> ASFilters = + errorProbabilities.getProbabilitiesByFilterAndAllele().entrySet().stream().map( + entry -> addFilterStrings(entry.getValue(), siteFilters, entry.getKey().filterName())).collect(Collectors.toList()); + + siteFilters.forEach(vcb::filter); + List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> getMergedFilterStringForAllele(allele, ASFilters)).collect(Collectors.toList()); + String finalAttrString = AnnotationUtils.encodeAnyASList(orderedASFilterStrings); + + vcb.putAttributes(Collections.singletonMap(GATKVCFConstants.AS_FILTER_STATUS_KEY, finalAttrString)); return vcb.make(); } + /** + * Creates a comma separated string of all the filters that apply to the allele. This is basically + * a pivot of the data. we have filterlist -> allele -> filterName. and we want allele -> list of filterName + * @param allele the allele to collect filters for + * @param alleleSpecificFilters all of the allele specific filters with the allele filter info + * @return encoded (comma separated) list of filters that apply to the allele + */ + private String getMergedFilterStringForAllele(Allele allele, List> alleleSpecificFilters) { + // loop through each filter and pull out the filters the specified allele + List results = alleleSpecificFilters.stream().map(m -> m.get(allele)).distinct().collect(Collectors.toList()); + if (results.size() > 1 && results.contains(VCFConstants.PASSES_FILTERS_v4)) { + results.remove(VCFConstants.PASSES_FILTERS_v4); + } else if (results.isEmpty()) { + results.add(VCFConstants.PASSES_FILTERS_v4); + } + return AnnotationUtils.encodeStringList(results); + } + + /** + * For each allele, determine whether the filter should be applied. also determine if the filter should apply to the site + * @param probabilities the probability computed by the filter for the allele + * @param siteFilters output value - filter name is added if it should apply to the site + * @param filterName the name of the filter used in the vcf + * @return map of alleles to the appropriate filter string + */ + private Map addFilterStrings(Map probabilities, List siteFilters, String filterName) { + Map results = probabilities.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, + entry -> entry.getValue().isNaN() ? VCFConstants.EMPTY_INFO_FIELD : entry.getValue() > Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())) ? + filterName : VCFConstants.PASSES_FILTERS_v4)); + List realFilters = results.values().stream().filter(x -> !x.equals(VCFConstants.EMPTY_INFO_FIELD)).collect(Collectors.toList()); + if (!realFilters.isEmpty() && realFilters.stream().allMatch(x -> x.equals(filterName))) { + siteFilters.add(filterName); + } + return results; + } + public static double roundFinitePrecisionErrors(final double probability) { return Math.max(Math.min(probability, 1.0), 0.0); } @@ -236,7 +288,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { if (MTFAC.mitochondria) { filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); - filters.add(new PolymorphicNuMTFilter(MTFAC.maxNuMTAutosomalCopies)); + alleleFilters.add(new NuMTFilter(MTFAC.medianAutosomalCoverage, MTFAC.maxNuMTAutosomalCopies)); } else { filters.add(new ClusteredEventsFilter(MTFAC.maxEventsInRegion)); filters.add(new MultiallelicFilter(MTFAC.numAltAllelesThreshold)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java index 77c64024769..a1929e165e0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java @@ -1,14 +1,9 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.lang3.tuple.ImmutablePair; import org.broadinstitute.hellbender.engine.ReferenceContext; -import java.util.Comparator; -import java.util.List; -import java.util.Optional; - -public abstract class Mutect2VariantFilter { +public abstract class Mutect2VariantFilter extends Mutect2Filter { public Mutect2VariantFilter() { } public double errorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { @@ -17,40 +12,4 @@ public double errorProbability(final VariantContext vc, final Mutect2FilteringEn } protected abstract double calculateErrorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); - - // by default do nothing, but we may override to allow some filters to learn their parameters in the first pass of {@link FilterMutectCalls} - protected void accumulateDataForLearning(final VariantContext vc, final ErrorProbabilities errorProbabilities, final Mutect2FilteringEngine filteringEngine) { } - protected void clearAccumulatedData() { } - protected void learnParameters() { } - protected void learnParametersAndClearAccumulatedData() { - learnParameters(); - clearAccumulatedData(); - } - - public abstract ErrorType errorType(); - - public abstract String filterName(); - - public abstract Optional phredScaledPosteriorAnnotationName(); - - protected abstract List requiredAnnotations(); - - // weighted median -- what's the lowest posterior probability that accounts for samples with half of the total alt depth - protected static double weightedMedianPosteriorProbability(List> depthsAndPosteriors) { - final int totalAltDepth = depthsAndPosteriors.stream().mapToInt(ImmutablePair::getLeft).sum(); - - // sort from lowest to highest posterior probability of artifact - depthsAndPosteriors.sort(Comparator.comparingDouble(p -> p.getRight())); - - int cumulativeAltCount = 0; - - for (final ImmutablePair pair : depthsAndPosteriors) { - cumulativeAltCount += pair.getLeft(); - if (cumulativeAltCount * 2 >= totalAltDepth) { - return pair.getRight(); - } - } - return 0; - } - } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java new file mode 100644 index 00000000000..1169b54aa92 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java @@ -0,0 +1,57 @@ +package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import org.apache.commons.math3.distribution.PoissonDistribution; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; + +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Collectors; + + +public class NuMTFilter extends HardAlleleFilter { + private static final double LOWER_BOUND_PROB = .01; + private final int maxAltDepthCutoff; + + public NuMTFilter(final double medianAutosomalCoverage, final double maxNuMTCopies){ + if (maxNuMTCopies > 0) { + final PoissonDistribution autosomalCoverage = new PoissonDistribution(medianAutosomalCoverage * maxNuMTCopies / 2.0); + maxAltDepthCutoff = autosomalCoverage.inverseCumulativeProbability(1 - LOWER_BOUND_PROB); + } else { + maxAltDepthCutoff = 0; + } + } + + @Override + public ErrorType errorType() { return ErrorType.NON_SOMATIC; } + + @Override + public Predicate checkPreconditions() { + return Genotype::hasAD; + } + + public List getData(Genotype g) { + return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); + } + + @Override + public Map areAllelesArtifacts(final LinkedHashMap> dataByAllele, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + return dataByAllele.entrySet().stream() + .filter(entry -> !entry.getKey().isSymbolic() && !vc.getReference().equals(entry.getKey())) + .collect(Collectors.toMap( + Map.Entry::getKey, + entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff)); + } + + @Override + public String filterName() { + return GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME; + } + + @Override + protected List requiredAnnotations() { return Collections.emptyList(); } + +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymorphicNuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymorphicNuMTFilter.java deleted file mode 100644 index 72f04e48c0b..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymorphicNuMTFilter.java +++ /dev/null @@ -1,51 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; - -import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.lang.mutable.MutableBoolean; -import org.apache.commons.math3.distribution.PoissonDistribution; -import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; - -import java.util.Collections; -import java.util.List; -import java.util.OptionalInt; -import java.util.stream.IntStream; - -public class PolymorphicNuMTFilter extends HardFilter { - private static final double LOWER_BOUND_PROB = .01; -// private static final double MULTIPLE_COPIES_MULTIPLIER = 2.0; - private final int maxAltDepthCutoff; - - public PolymorphicNuMTFilter(final double maxNuMTCopies){ - if (maxNuMTCopies != 0) { - final PoissonDistribution autosomalCoverage = new PoissonDistribution(maxNuMTCopies); - maxAltDepthCutoff = autosomalCoverage.inverseCumulativeProbability(1 - LOWER_BOUND_PROB); - } else { - maxAltDepthCutoff = 0; - } - } - - @Override - public ErrorType errorType() { return ErrorType.NON_SOMATIC; } - - @Override - public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { - return vc.getGenotypes().stream().filter(filteringEngine::isTumor) - .filter(Genotype::hasAD) - .anyMatch(g -> { - final int[] alleleDepths = g.getAD(); - final int numRealAlleles = vc.hasSymbolicAlleles() ? alleleDepths.length - 1 : alleleDepths.length; - //Start at first alternate allele depth (the ref allele is first) - final OptionalInt max = IntStream.range(1, numRealAlleles).map(a -> alleleDepths[a]).max(); - return max.getAsInt() < maxAltDepthCutoff; - }); - } - - @Override - public String filterName() { - return GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME; - } - - @Override - protected List requiredAnnotations() { return Collections.emptyList(); } -} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index da679e4227d..89c68f72900 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -163,7 +163,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i public final static String N_RATIO_FILTER_NAME = "n_ratio"; public final static String CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME = "numt_chimera"; //mitochondria public final static String ALLELE_FRACTION_FILTER_NAME = "low_allele_frac"; - public static final String POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME = "possible_numt"; + public static final String POSSIBLE_NUMT_FILTER_NAME = "possible_numt"; public static final List MUTECT_FILTER_NAMES = Arrays.asList(VCFConstants.PASSES_FILTERS_v4, POLYMERASE_SLIPPAGE, PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_EVIDENCE_FILTER_NAME, GERMLINE_RISK_FILTER_NAME, @@ -172,7 +172,9 @@ their names (or descriptions) depend on some threshold. Those filters are not i MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, READ_POSITION_FILTER_NAME, CONTAMINATION_FILTER_NAME, DUPLICATED_EVIDENCE_FILTER_NAME, READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME, CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME, - STRICT_STRAND_BIAS_FILTER_NAME, N_RATIO_FILTER_NAME, ALLELE_FRACTION_FILTER_NAME, POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME); + STRICT_STRAND_BIAS_FILTER_NAME, N_RATIO_FILTER_NAME, ALLELE_FRACTION_FILTER_NAME, POSSIBLE_NUMT_FILTER_NAME); + + public static final List MUTECT_AS_FILTER_NAMES = Arrays.asList(AS_FILTER_STATUS_KEY); // Symbolic alleles public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT"; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index fd96072e5ae..5874f1a64d4 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -102,7 +102,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf //Mitochondrial M2-related filters addFilterLine(new VCFFilterHeaderLine(CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME, "NuMT variant with too many ALT reads originally from autosome")); - addFilterLine(new VCFFilterHeaderLine(POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME, "Alt depth is below expected coverage of NuMT in autosome")); + addFilterLine(new VCFFilterHeaderLine(POSSIBLE_NUMT_FILTER_NAME, "Allele depth is below expected coverage of NuMT in autosome")); addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); addFormatLine(new VCFFormatHeaderLine(MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index bceeacd86bc..0b0d58df60c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -6,6 +6,7 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFHeader; +import org.apache.commons.collections4.SetUtils; import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.Main; @@ -513,24 +514,26 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), ImmutableSet.of( GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, - GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME, + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), Collections.emptySet(), Collections.emptySet(), - Collections.emptySet())}, + Collections.emptySet()), + Arrays.asList(".|PASS", ".|PASS", ".|possible_numt", ".|PASS|possible_numt|possible_numt", ".|PASS", ".|PASS")}, {NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), - ImmutableSet.of(GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), + ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, - GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_FILTER_NAME, - GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME))} + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, + GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME)), + Arrays.asList(".|.", ".|PASS|.", ".|possible_numt|.", ".|PASS|PASS|.", ".|possible_numt|.")} }; } @Test(dataProvider = "vcfsForFiltering") - public void testFilterMitochondria(File unfiltered, final double minAlleleFraction, final double autosomalCoverage, final List intervals, List> expectedFilters) { + public void testFilterMitochondria(File unfiltered, final double minAlleleFraction, final double autosomalCoverage, final List intervals, List> expectedFilters, List expectedASFilters) { final File filteredVcf = createTempFile("filtered", ".vcf"); // vcf sequence dicts don't match ref @@ -539,6 +542,7 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti args -> args.add(StandardArgumentDefinitions.DISABLE_SEQUENCE_DICT_VALIDATION_NAME, true), args -> args.add(M2FiltersArgumentCollection.MIN_AF_LONG_NAME, minAlleleFraction), args -> args.add(M2FiltersArgumentCollection.MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, autosomalCoverage), + args -> args.add(M2FiltersArgumentCollection.MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, 4.0), args -> { intervals.stream().map(SimpleInterval::new).forEach(args::addInterval); return args; @@ -547,10 +551,14 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti final List> actualFilters = VariantContextTestUtils.streamVcf(filteredVcf) .map(VariantContext::getFilters).collect(Collectors.toList()); + final List actualASFilters = VariantContextTestUtils.streamVcf(filteredVcf) + .map(vc -> vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, "")).collect(Collectors.toList()); + Assert.assertEquals(expectedASFilters, actualASFilters); + Assert.assertEquals(expectedFilters.size(), actualFilters.size()); for (int n = 0; n < actualFilters.size(); n++) { - Assert.assertTrue(actualFilters.get(n).containsAll(expectedFilters.get(n))); - Assert.assertTrue(expectedFilters.get(n).containsAll(actualFilters.get(n))); + Assert.assertTrue(actualFilters.get(n).containsAll(expectedFilters.get(n)), "Actual filters missing some expected filters: " + SetUtils.difference(expectedFilters.get(n), actualFilters.get(n))); + Assert.assertTrue(expectedFilters.get(n).containsAll(actualFilters.get(n)), "Expected filters missing some actual filters: " + SetUtils.difference(actualFilters.get(n), expectedFilters.get(n))); } Assert.assertEquals(expectedFilters, actualFilters); From e94059e295619ad79ed214a09f527496cd68d3d9 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 5 Dec 2019 11:17:34 -0500 Subject: [PATCH 03/85] wip --- .../tools/walkers/mutect/filtering/ErrorProbabilities.java | 7 ++++--- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 23461568caa..3c557035bca 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -8,6 +8,7 @@ import java.util.EnumMap; import java.util.List; import java.util.Map; +import java.util.function.DoubleSupplier; import java.util.function.Function; import java.util.stream.Collectors; @@ -23,9 +24,9 @@ public ErrorProbabilities(final List variantFilters, final probabilitiesByFilterAndAllele = alleleFilters.stream().collect(Collectors.toMap(Function.identity(), f -> f.errorProbability(vc, filteringEngine, referenceContext))); probabilitiesByType = Arrays.stream(ErrorType.values()).collect(Collectors.toMap(v -> v, v -> 0.0, (a,b) -> a, () -> new EnumMap<>(ErrorType.class))); variantFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> Math.max(prob, probabilitiesByFilter.get(f)))); - alleleFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> - Math.max(prob, - probabilitiesByFilterAndAllele.get(f).values().stream().max(Double::compare).get()))); +// alleleFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> +// Math.max(prob, +// probabilitiesByFilterAndAllele.get(f).values().stream().filter(d -> !d.isNaN()).max(Double::compare).orElseGet(() -> 0.0)))); // treat errors of different types as independent double trueProbability = 1; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 0b0d58df60c..f023a1d6070 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -519,7 +519,7 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), Collections.emptySet(), Collections.emptySet()), - Arrays.asList(".|PASS", ".|PASS", ".|possible_numt", ".|PASS|possible_numt|possible_numt", ".|PASS", ".|PASS")}, + Arrays.asList( ".|PASS", ".|PASS", ".|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, ".|PASS|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME + "|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, ".|PASS", ".|PASS")}, {NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), @@ -528,7 +528,7 @@ public Object[][] vcfsForFiltering() { ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME)), - Arrays.asList(".|.", ".|PASS|.", ".|possible_numt|.", ".|PASS|PASS|.", ".|possible_numt|.")} + Arrays.asList(".|.", ".|PASS|.", ".|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME + ".", ".|PASS|PASS|.", ".|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME + "|.")} }; } From 22345ad2607e53015a919e88a418375ba9477f17 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 6 Dec 2019 13:15:39 -0500 Subject: [PATCH 04/85] converted base qual and tumor --- .../walkers/annotator/AnnotationUtils.java | 2 +- .../mutect/filtering/BaseQualityFilter.java | 19 +++-- .../mutect/filtering/ErrorProbabilities.java | 11 +-- .../mutect/filtering/HardAlleleFilter.java | 10 ++- .../mutect/filtering/Mutect2AlleleFilter.java | 85 ++++++++++++------- .../filtering/Mutect2FilteringEngine.java | 32 ++++--- .../walkers/mutect/filtering/NuMTFilter.java | 9 +- .../mutect/filtering/TumorEvidenceFilter.java | 20 +++-- .../mutect/Mutect2IntegrationTest.java | 29 +++++-- 9 files changed, 128 insertions(+), 89 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java index aac6a88e762..3858a248dd5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java @@ -50,7 +50,7 @@ public static String encodeAnyASList( final List somethingList) { * @return a list of allele-specific annotation entries */ public static List decodeAnyASList( final String somethingList) { - return Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(somethingList.replaceAll(BRACKET_REGEX, ""), ALLELE_SPECIFIC_REDUCED_DELIM)); + return Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(somethingList.replaceAll(BRACKET_REGEX, ""), ALLELE_SPECIFIC_PRINT_DELIM)); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java index e35bc55a60c..69a5a38cb5f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java @@ -1,13 +1,21 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import java.util.Arrays; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; +import java.util.function.Predicate; +import java.util.stream.Collectors; -public class BaseQualityFilter extends HardFilter { +public class BaseQualityFilter extends HardAlleleFilter { private final double minMedianBaseQuality; public BaseQualityFilter(final double minMedianBaseQuality) { @@ -18,12 +26,13 @@ public BaseQualityFilter(final double minMedianBaseQuality) { public ErrorType errorType() { return ErrorType.ARTIFACT; } @Override - public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { - final List baseQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY, 0); + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + List baseQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY, 0); + baseQualityByAllele.remove(0); // get rid of ref final double[] tumorLods = Mutect2FilteringEngine.getTumorLogOdds(vc); - final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods); - return baseQualityByAllele.get(indexOfMaxTumorLod + 1) < minMedianBaseQuality; + return baseQualityByAllele.stream().map(qual -> qual < minMedianBaseQuality).collect(Collectors.toList()); + } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 3c557035bca..a1268a79eb8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -4,24 +4,21 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import java.util.Arrays; -import java.util.EnumMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.function.DoubleSupplier; import java.util.function.Function; import java.util.stream.Collectors; public final class ErrorProbabilities { private final Map probabilitiesByFilter; - private final Map, Map> probabilitiesByFilterAndAllele; + private final LinkedHashMap, List> probabilitiesByFilterAndAllele; private final EnumMap probabilitiesByType; private final double errorProbability; public ErrorProbabilities(final List variantFilters, final List> alleleFilters, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, final ReferenceContext referenceContext) { probabilitiesByFilter = variantFilters.stream().collect(Collectors.toMap(Function.identity(), f -> f.errorProbability(vc, filteringEngine, referenceContext))); - probabilitiesByFilterAndAllele = alleleFilters.stream().collect(Collectors.toMap(Function.identity(), f -> f.errorProbability(vc, filteringEngine, referenceContext))); + probabilitiesByFilterAndAllele = alleleFilters.stream().collect(Collectors.toMap(Function.identity(), f -> f.errorProbability(vc, filteringEngine, referenceContext), (a,b) -> a, () -> new LinkedHashMap<>())); probabilitiesByType = Arrays.stream(ErrorType.values()).collect(Collectors.toMap(v -> v, v -> 0.0, (a,b) -> a, () -> new EnumMap<>(ErrorType.class))); variantFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> Math.max(prob, probabilitiesByFilter.get(f)))); // alleleFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> @@ -41,5 +38,5 @@ public ErrorProbabilities(final List variantFilters, final public double getTechnicalArtifactProbability() { return probabilitiesByType.get(ErrorType.ARTIFACT); } public double getNonSomaticProbability() { return probabilitiesByType.get(ErrorType.NON_SOMATIC); } public Map getProbabilitiesByFilter() { return probabilitiesByFilter; } - public Map, Map> getProbabilitiesByFilterAndAllele() { return probabilitiesByFilterAndAllele; } + public Map, List> getProbabilitiesByFilterAndAllele() { return probabilitiesByFilterAndAllele; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java index dbfbf9c28f6..8e39f616f20 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java @@ -5,15 +5,17 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import java.util.*; +import java.util.stream.Collectors; public abstract class HardAlleleFilter extends Mutect2AlleleFilter { - public void calculateErrorProbabilityForAlleles(LinkedHashMap results, final LinkedHashMap> dataByAllele, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - Map alleleArtifacts = areAllelesArtifacts(dataByAllele, vc, filteringEngine, referenceContext); + @Override + public List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + List alleleArtifacts = areAllelesArtifacts(vc, filteringEngine, referenceContext); // only set values for alleles returned - alleleArtifacts.forEach((key, value) -> results.put(key, value ? 1.0 : 0.0)); + return alleleArtifacts.stream().map(value -> value ? 1.0 : 0.0).collect(Collectors.toList()); } - public abstract Map areAllelesArtifacts(final LinkedHashMap> dataByAllele, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); + public abstract List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); // the posterior of a hard filter is 0 or 1, hence there's no reason to annotate it @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 75cd0864529..35c02b84c6a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -4,6 +4,7 @@ import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.utils.IndexRange; import java.util.*; import java.util.function.Function; @@ -12,58 +13,76 @@ public abstract class Mutect2AlleleFilter extends Mutect2Filter { - public Map applyFilter(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + + public LinkedHashMap> getDataByAllele(final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values LinkedHashMap> dataByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele - vc.getGenotypes().stream().filter(filteringEngine::isTumor) - .filter(checkPreconditions()) + vc.getGenotypes().stream().filter(preconditions).filter(filteringEngine::isTumor) .forEach(g -> { - Iterator alleleDataIterator = getData(g).iterator(); + Iterator alleleDataIterator = getData.apply(g).iterator(); Iterator> dataByAlleleIterator = dataByAllele.values().iterator(); while(alleleDataIterator.hasNext() && dataByAlleleIterator.hasNext()) dataByAlleleIterator.next().add(alleleDataIterator.next()); }); - // construct output map with defaults - LinkedHashMap probabilityByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> this.getDefaultProbability(), (a, b) -> null, () -> new LinkedHashMap<>())); - - // now invoke the filter giving it the map with the data separated by allele - calculateErrorProbabilityForAlleles(probabilityByAllele, dataByAllele, vc, filteringEngine, referenceContext); - return probabilityByAllele; + return dataByAllele; } - /** - * Subclasses should override if they want a different default probability. - * Keep in mind that in the final output, NaN is used to determine when indicating that no probability was computed, and . will be the output for those alleles - * @return the default probability to use - */ - public Double getDefaultProbability() { - return Double.NaN; - } +// public List applyFilter(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { +// // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values +// LinkedHashMap> dataByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); +// +// // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele +// vc.getGenotypes().stream().filter(filteringEngine::isTumor) +// .filter(checkPreconditions()) +// .forEach(g -> { +// Iterator alleleDataIterator = getData(g).iterator(); +// Iterator> dataByAlleleIterator = dataByAllele.values().iterator(); +// while(alleleDataIterator.hasNext() && dataByAlleleIterator.hasNext()) +// dataByAlleleIterator.next().add(alleleDataIterator.next()); +// +// }); +// +// // construct output map with defaults +//// LinkedHashMap probabilityByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> this.getDefaultProbability(), (a, b) -> null, () -> new LinkedHashMap<>())); +// +// // now invoke the filter giving it the map with the data separated by allele +// List probabilityByAllele = calculateErrorProbabilityForAlleles(dataByAllele, vc, filteringEngine, referenceContext); +// return probabilityByAllele; +// } - /** - * All subclass filters should implement if they need to verify the data needed exists in the genotype - * @return a predicate that will be applied to determine which genotypes will be part of the filter - */ - public abstract Predicate checkPreconditions(); +// /** +// * Subclasses should override if they want a different default probability. +// * Keep in mind that in the final output, NaN is used to determine when indicating that no probability was computed, and . will be the output for those alleles +// * @return the default probability to use +// */ +// public Double getDefaultProbability() { +// return Double.NaN; +// } +// +// /** +// * All subclass filters should implement if they need to verify the data needed exists in the genotype +// * @return a predicate that will be applied to determine which genotypes will be part of the filter +// */ +// public abstract Predicate checkPreconditions(); - /** - * All subclass filters should implement this method to return the necessary data needed to apply the filter - * @param g the genotype to pull the data from - * @return A list of per-allele data for each allele in the variant context (the data in the genotype is ordered by the alleles returned from vc.getAlleles() - */ - public abstract List getData(Genotype g); +// /** +// * All subclass filters should implement this method to return the necessary data needed to apply the filter +// * @param g the genotype to pull the data from +// * @return A list of per-allele data for each allele in the variant context (the data in the genotype is ordered by the alleles returned from vc.getAlleles() +// */ +// public abstract List getData(Genotype g); - public Map errorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + public List errorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { return requiredAnnotations().stream().allMatch(vc::hasAttribute) ? - applyFilter(vc, filteringEngine, referenceContext) : + calculateErrorProbabilityForAlleles(vc, filteringEngine, referenceContext) : // TODO make sure that somewhere the roundFinitePrecisionErrors is called when not a hard filter // .entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> Mutect2FilteringEngine.roundFinitePrecisionErrors(entry.getValue()))) : - Collections.emptyMap(); + Collections.emptyList(); } - protected abstract void calculateErrorProbabilityForAlleles(LinkedHashMap results, final LinkedHashMap> dataByAllele, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); + protected abstract List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 8df29bb428d..a28ea04200e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -1,6 +1,5 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; -import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -202,12 +201,13 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext // apply allele specific filters List siteFilters = new ArrayList<>(); - List> ASFilters = - errorProbabilities.getProbabilitiesByFilterAndAllele().entrySet().stream().map( + List> ASFilters = + errorProbabilities.getProbabilitiesByFilterAndAllele().entrySet().stream().filter(entry -> !entry.getValue().isEmpty()).map( entry -> addFilterStrings(entry.getValue(), siteFilters, entry.getKey().filterName())).collect(Collectors.toList()); siteFilters.forEach(vcb::filter); - List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> getMergedFilterStringForAllele(allele, ASFilters)).collect(Collectors.toList()); + List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> allele.isReference() || allele.isSymbolic() ? + VCFConstants.EMPTY_INFO_FIELD : getMergedFilterStringForAllele(ASFilters)).collect(Collectors.toList()); String finalAttrString = AnnotationUtils.encodeAnyASList(orderedASFilterStrings); vcb.putAttributes(Collections.singletonMap(GATKVCFConstants.AS_FILTER_STATUS_KEY, finalAttrString)); @@ -217,13 +217,12 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext /** * Creates a comma separated string of all the filters that apply to the allele. This is basically * a pivot of the data. we have filterlist -> allele -> filterName. and we want allele -> list of filterName - * @param allele the allele to collect filters for * @param alleleSpecificFilters all of the allele specific filters with the allele filter info * @return encoded (comma separated) list of filters that apply to the allele */ - private String getMergedFilterStringForAllele(Allele allele, List> alleleSpecificFilters) { + private String getMergedFilterStringForAllele(List> alleleSpecificFilters) { // loop through each filter and pull out the filters the specified allele - List results = alleleSpecificFilters.stream().map(m -> m.get(allele)).distinct().collect(Collectors.toList()); + List results = alleleSpecificFilters.stream().map(alleleValuesIterator -> alleleValuesIterator.next()).distinct().collect(Collectors.toList()); if (results.size() > 1 && results.contains(VCFConstants.PASSES_FILTERS_v4)) { results.remove(VCFConstants.PASSES_FILTERS_v4); } else if (results.isEmpty()) { @@ -237,17 +236,16 @@ private String getMergedFilterStringForAllele(Allele allele, List addFilterStrings(Map probabilities, List siteFilters, String filterName) { - Map results = probabilities.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, - entry -> entry.getValue().isNaN() ? VCFConstants.EMPTY_INFO_FIELD : entry.getValue() > Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())) ? - filterName : VCFConstants.PASSES_FILTERS_v4)); - List realFilters = results.values().stream().filter(x -> !x.equals(VCFConstants.EMPTY_INFO_FIELD)).collect(Collectors.toList()); - if (!realFilters.isEmpty() && realFilters.stream().allMatch(x -> x.equals(filterName))) { + private Iterator addFilterStrings(List probabilities, List siteFilters, String filterName) { + List results = probabilities.stream().map(value -> value > Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())) ? + filterName : VCFConstants.PASSES_FILTERS_v4).collect(Collectors.toList()); +// List realFilters = results.stream().filter(x -> !x.equals(VCFConstants.EMPTY_INFO_FIELD)).collect(Collectors.toList()); + if (!results.isEmpty() && results.stream().allMatch(x -> x.equals(filterName))) { siteFilters.add(filterName); } - return results; + return results.iterator(); } public static double roundFinitePrecisionErrors(final double probability) { @@ -263,8 +261,8 @@ public void writeFilteringStats(final Path filteringStats) { } private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { - filters.add(new TumorEvidenceFilter()); - filters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); + alleleFilters.add(new TumorEvidenceFilter()); + alleleFilters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); filters.add(new StrandArtifactFilter()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java index 1169b54aa92..ee043dd4766 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java @@ -3,6 +3,7 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; +import org.apache.commons.collections.functors.AndPredicate; import org.apache.commons.math3.distribution.PoissonDistribution; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -28,7 +29,6 @@ public NuMTFilter(final double medianAutosomalCoverage, final double maxNuMTCopi @Override public ErrorType errorType() { return ErrorType.NON_SOMATIC; } - @Override public Predicate checkPreconditions() { return Genotype::hasAD; } @@ -38,12 +38,11 @@ public List getData(Genotype g) { } @Override - public Map areAllelesArtifacts(final LinkedHashMap> dataByAllele, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + LinkedHashMap> dataByAllele = getDataByAllele(vc, checkPreconditions(), this::getData, filteringEngine); return dataByAllele.entrySet().stream() .filter(entry -> !entry.getKey().isSymbolic() && !vc.getReference().equals(entry.getKey())) - .collect(Collectors.toMap( - Map.Entry::getKey, - entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff)); + .map(entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff).collect(Collectors.toList()); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java index 11f318887ed..b3d3ecffb81 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java @@ -4,27 +4,29 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.mutect.clustering.Datum; import org.broadinstitute.hellbender.tools.walkers.mutect.clustering.SomaticClusteringModel; +import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; -import java.util.Collections; -import java.util.List; -import java.util.Optional; +import java.util.*; -public class TumorEvidenceFilter extends Mutect2VariantFilter { +public class TumorEvidenceFilter extends Mutect2AlleleFilter { @Override public ErrorType errorType() { return ErrorType.SEQUENCING; } @Override - public double calculateErrorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + protected List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) + { final double[] tumorLods = Mutect2FilteringEngine.getTumorLogOdds(vc); final int[] ADs = filteringEngine.sumADsOverSamples(vc, true, false); - final int maxIndex = MathUtils.maxElementIndex(tumorLods); - final int altCount = ADs[maxIndex + 1]; final int totalCount = (int) MathUtils.sum(ADs); + SomaticClusteringModel model = filteringEngine.getSomaticClusteringModel(); - return filteringEngine.getSomaticClusteringModel() - .probabilityOfSequencingError(new Datum(tumorLods[maxIndex], 0, 0, altCount, totalCount, SomaticClusteringModel.indelLength(vc, maxIndex))); + List altResults = new ArrayList<>(); + new IndexRange(0, tumorLods.length).forEach(i -> + altResults.add(model.probabilityOfSequencingError(new Datum(tumorLods[i], 0, 0, ADs[i+1], totalCount, SomaticClusteringModel.indelLength(vc, i))))); + + return altResults; } @Override diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index f023a1d6070..26c699a4bec 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -519,7 +519,14 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), Collections.emptySet(), Collections.emptySet()), - Arrays.asList( ".|PASS", ".|PASS", ".|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, ".|PASS|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME + "|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, ".|PASS", ".|PASS")}, + Arrays.asList( + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4 , GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, possible_numt|possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4) // .|PASS + )}, {NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), @@ -528,12 +535,18 @@ public Object[][] vcfsForFiltering() { ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME)), - Arrays.asList(".|.", ".|PASS|.", ".|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME + ".", ".|PASS|PASS|.", ".|" + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME + "|.")} + Arrays.asList( + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD), //".|.", + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual|.", + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // ".|weak_evidence, possible_numt|.", + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|PASS|weak_evidence, base_qual|.", + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, possible_numt|." + )} }; } @Test(dataProvider = "vcfsForFiltering") - public void testFilterMitochondria(File unfiltered, final double minAlleleFraction, final double autosomalCoverage, final List intervals, List> expectedFilters, List expectedASFilters) { + public void testFilterMitochondria(File unfiltered, final double minAlleleFraction, final double autosomalCoverage, final List intervals, List> expectedFilters, List> expectedASFilters) { final File filteredVcf = createTempFile("filtered", ".vcf"); // vcf sequence dicts don't match ref @@ -551,17 +564,17 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti final List> actualFilters = VariantContextTestUtils.streamVcf(filteredVcf) .map(VariantContext::getFilters).collect(Collectors.toList()); - final List actualASFilters = VariantContextTestUtils.streamVcf(filteredVcf) - .map(vc -> vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, "")).collect(Collectors.toList()); - Assert.assertEquals(expectedASFilters, actualASFilters); + final List> actualASFilters = VariantContextTestUtils.streamVcf(filteredVcf) + .map(vc -> AnnotationUtils.decodeAnyASList(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, ""))).collect(Collectors.toList()); + Assert.assertEquals(actualASFilters, expectedASFilters); - Assert.assertEquals(expectedFilters.size(), actualFilters.size()); + Assert.assertEquals(actualFilters.size(), expectedFilters.size()); for (int n = 0; n < actualFilters.size(); n++) { Assert.assertTrue(actualFilters.get(n).containsAll(expectedFilters.get(n)), "Actual filters missing some expected filters: " + SetUtils.difference(expectedFilters.get(n), actualFilters.get(n))); Assert.assertTrue(expectedFilters.get(n).containsAll(actualFilters.get(n)), "Expected filters missing some actual filters: " + SetUtils.difference(actualFilters.get(n), expectedFilters.get(n))); } - Assert.assertEquals(expectedFilters, actualFilters); + Assert.assertEquals(actualFilters, expectedFilters); } @Test From 32d4ed25a7dc10690da23694125a90bd41381ef9 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 6 Dec 2019 14:18:38 -0500 Subject: [PATCH 05/85] converted mapping qual filter --- .../mutect/filtering/BaseQualityFilter.java | 3 --- .../filtering/MappingQualityFilter.java | 22 +++++++++++++------ .../filtering/Mutect2FilteringEngine.java | 2 +- .../mutect/Mutect2IntegrationTest.java | 4 ++-- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java index 69a5a38cb5f..1a58595952a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java @@ -29,10 +29,7 @@ public BaseQualityFilter(final double minMedianBaseQuality) { public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { List baseQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY, 0); baseQualityByAllele.remove(0); // get rid of ref - final double[] tumorLods = Mutect2FilteringEngine.getTumorLogOdds(vc); - return baseQualityByAllele.stream().map(qual -> qual < minMedianBaseQuality).collect(Collectors.toList()); - } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java index 8d90e00fee4..6560a8783cd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java @@ -1,13 +1,16 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; import htsjdk.variant.variantcontext.VariantContext; -import org.broadinstitute.hellbender.tools.walkers.annotator.MappingQuality; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; -public class MappingQualityFilter extends HardFilter { +// TODO this class doens't use the generic method - how to simplify? +public class MappingQualityFilter extends HardAlleleFilter { private final double minMedianMappingQuality; private final int longIndelSize; @@ -20,15 +23,20 @@ public MappingQualityFilter(final double minMedianMappingQuality, final int long public ErrorType errorType() { return ErrorType.ARTIFACT; } @Override - public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { - final List indelLengths = vc.getIndelLengths(); - final int indelLength = indelLengths == null ? 0 : indelLengths.stream().mapToInt(Math::abs).max().orElseGet(() -> 0); - final List mappingQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY, 0); + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + final List indelLengths = vc.getIndelLengths(); // alts only + List mappingQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY, 0); // we use the mapping quality annotation of the alt allele in most cases, but for long indels we use the reference // annotation. We have to do this because the indel, even if it maps uniquely, gets a poor mapping quality // by virtue of its mismatch. The reference mapping quality is a decent proxy for the region's mappability. - return mappingQualityByAllele.get(indelLength < longIndelSize ? 1 : 0) < minMedianMappingQuality; + int refQual = mappingQualityByAllele.remove(0); // get the ref value and convert list to alts only + new IndexRange(0, mappingQualityByAllele.size()-1).forEach(i -> { + if (indelLengths != null && indelLengths.get(i) >= longIndelSize) { + mappingQualityByAllele.set(i, refQual); + } + }); + return mappingQualityByAllele.stream().map(qual -> qual < minMedianMappingQuality).collect(Collectors.toList()); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index a28ea04200e..8264237b4bf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -263,7 +263,7 @@ public void writeFilteringStats(final Path filteringStats) { private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { alleleFilters.add(new TumorEvidenceFilter()); alleleFilters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); - filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); + alleleFilters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); filters.add(new StrandArtifactFilter()); filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 26c699a4bec..ef65e915b2c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -539,8 +539,8 @@ public Object[][] vcfsForFiltering() { Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD), //".|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // ".|weak_evidence, possible_numt|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|PASS|weak_evidence, base_qual|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, possible_numt|." + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|PASS|weak_evidence, base_qual|.", + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, map_qual, possible_numt|." )} }; } From 1d64edf75c0c927227b345e0f6eeaa78f0cc845d Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 6 Dec 2019 15:17:22 -0500 Subject: [PATCH 06/85] updated duplicate alt read filter - but need to add tests --- .../mutect/filtering/DuplicatedAltReadFilter.java | 12 ++++++++---- .../mutect/filtering/Mutect2FilteringEngine.java | 10 +++++++++- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 1 + 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java index b02839e32d4..935fba94182 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java @@ -1,15 +1,19 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.UniqueAltReadCount; +import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; // This filter checks for the case in which PCR-duplicates with unique UMIs (which we assume is caused by false adapter priming) // amplify the erroneous signal for an alternate allele. -public class DuplicatedAltReadFilter extends HardFilter { +public class DuplicatedAltReadFilter extends HardAlleleFilter { private final int uniqueAltReadCount; public DuplicatedAltReadFilter(final int uniqueAltReadCount) { @@ -20,11 +24,11 @@ public DuplicatedAltReadFilter(final int uniqueAltReadCount) { public ErrorType errorType() { return ErrorType.ARTIFACT; } @Override - public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { - return vc.getAttributeAsInt(UniqueAltReadCount.KEY, 1) <= uniqueAltReadCount; + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + return Collections.singletonList(vc.getAttributeAsInt(UniqueAltReadCount.KEY, 1) <= uniqueAltReadCount); } - @Override + @Override public String filterName() { return GATKVCFConstants.DUPLICATED_EVIDENCE_FILTER_NAME; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 8264237b4bf..3745e1bf6c5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -111,6 +111,14 @@ public int[] sumStrandCountsOverSamples(final VariantContext vc, final boolean i return result; } +// public int[] sumStrandCountsOverSamplesByAllele(final VariantContext vc, final boolean includeTumor, final boolean includeNormal) { +// final int[] result = new int[4]; +// vc.getGenotypes().stream().filter(g -> (includeTumor && isTumor(g)) || (includeNormal && isNormal(g))) +// .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) +// .map(g -> StrandBiasTest.getStrandCounts(g)).forEach(sbbs -> new IndexRange(0, 4).forEach(n -> result[n] += sbbs[n])); +// return result; +// } + public double[] weightedAverageOfTumorAFs(final VariantContext vc) { final MutableDouble totalWeight = new MutableDouble(0); final double[] AFs = new double[vc.getNAlleles() - 1]; @@ -264,7 +272,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { alleleFilters.add(new TumorEvidenceFilter()); alleleFilters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); alleleFilters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); - filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); + alleleFilters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); filters.add(new StrandArtifactFilter()); filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); filters.add(new PanelOfNormalsFilter()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index ef65e915b2c..144491b2801 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -561,6 +561,7 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti return args; }); + // add tests for DUPLICATE final List> actualFilters = VariantContextTestUtils.streamVcf(filteredVcf) .map(VariantContext::getFilters).collect(Collectors.toList()); From 8b85de2140cc2d4ea9731e84d5781138a99510be Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 6 Dec 2019 15:27:02 -0500 Subject: [PATCH 07/85] undo bad delim change --- .../hellbender/tools/walkers/annotator/AnnotationUtils.java | 6 +++++- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java index 3858a248dd5..892b45a22d6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java @@ -44,13 +44,17 @@ public static String encodeAnyASList( final List somethingList) { return StringUtils.join(somethingList, ALLELE_SPECIFIC_PRINT_DELIM).replaceAll(BRACKET_REGEX, ""); //Who actually wants brackets at the ends of their string? Who??? } + public static List decodeAnyASListWithPrintDelim( final String somethingList) { + return Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(somethingList.replaceAll(BRACKET_REGEX, ""), ALLELE_SPECIFIC_PRINT_DELIM)); + } + /** * Helper function to convert a comma-separated String (such as a vc.getAttrbute().toString() output) to a List of Strings * @param somethingList the allele-specific annotations string; may have brackets * @return a list of allele-specific annotation entries */ public static List decodeAnyASList( final String somethingList) { - return Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(somethingList.replaceAll(BRACKET_REGEX, ""), ALLELE_SPECIFIC_PRINT_DELIM)); + return Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(somethingList.replaceAll(BRACKET_REGEX, ""), ALLELE_SPECIFIC_REDUCED_DELIM)); } /** diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 144491b2801..5b7ef3202ae 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -566,7 +566,7 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti .map(VariantContext::getFilters).collect(Collectors.toList()); final List> actualASFilters = VariantContextTestUtils.streamVcf(filteredVcf) - .map(vc -> AnnotationUtils.decodeAnyASList(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, ""))).collect(Collectors.toList()); + .map(vc -> AnnotationUtils.decodeAnyASListWithPrintDelim(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, ""))).collect(Collectors.toList()); Assert.assertEquals(actualASFilters, expectedASFilters); Assert.assertEquals(actualFilters.size(), expectedFilters.size()); From 27b49be53a6cf5c40e11d999498a6aa1d14899fb Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 3 Jan 2020 17:29:42 -0500 Subject: [PATCH 08/85] update min allele fraction filter --- .../filtering/MinAlleleFractionFilter.java | 33 +++++++++++++------ .../mutect/Mutect2IntegrationTest.java | 8 ++--- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java index b7baedfa823..e5942a7377d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java @@ -1,13 +1,21 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; +import org.apache.commons.lang.mutable.MutableBoolean; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.DoubleStream; import java.util.stream.IntStream; -public class MinAlleleFractionFilter extends HardFilter { +public class MinAlleleFractionFilter extends HardAlleleFilter { private final double minAf; public MinAlleleFractionFilter(final double minAf) { @@ -17,16 +25,21 @@ public MinAlleleFractionFilter(final double minAf) { @Override public ErrorType errorType() { return ErrorType.ARTIFACT; } + public Predicate checkPreconditions() { + return g -> g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY); + } + + public List getAltData(Genotype g) { + double[] data = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> null, 1.0); + return Arrays.stream(data).boxed().collect(Collectors.toList()); + } + @Override - public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { - return vc.getGenotypes().stream().filter(filteringEngine::isTumor) - .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY)) - .anyMatch(g -> { - final double[] alleleFractions = VariantContextGetters.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> null, 1.0); - final int numRealAlleles = vc.hasSymbolicAlleles() ? alleleFractions.length - 1 : alleleFractions.length; - final OptionalDouble max = IntStream.range(0, numRealAlleles).mapToDouble(a -> alleleFractions[a]).max(); - return max.getAsDouble() < minAf; - }); + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + LinkedHashMap> dataByAllele = getAltDataByAllele(vc, checkPreconditions(), this::getAltData, filteringEngine); + return dataByAllele.entrySet().stream() + .filter(entry -> !entry.getKey().isSymbolic() && !vc.getReference().equals(entry.getKey())) + .map(entry -> entry.getValue().stream().max(Double::compare).orElse(1.0) < minAf).collect(Collectors.toList()); } @Override diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 5b7ef3202ae..af801ef3fd7 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -522,8 +522,8 @@ public Object[][] vcfsForFiltering() { Arrays.asList( Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4 , GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, possible_numt|possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, low_allele_frac, possible_numt|low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4) // .|PASS )}, @@ -539,8 +539,8 @@ public Object[][] vcfsForFiltering() { Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD), //".|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // ".|weak_evidence, possible_numt|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|PASS|weak_evidence, base_qual|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, map_qual, possible_numt|." + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|PASS|weak_evidence, base_qual, low_allele_frac|.", + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, map_qual, position, low_allele_frac, possible_numt|." )} }; } From 1568a0e5ba355631ee213d2467754c50aa5a257e Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 6 Jan 2020 13:04:45 -0500 Subject: [PATCH 09/85] update read pos filter --- .../mutect/filtering/ReadPositionFilter.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java index fbfdfc0fe7c..ab372041a64 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java @@ -1,13 +1,14 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; import htsjdk.variant.variantcontext.VariantContext; -import org.broadinstitute.hellbender.tools.walkers.annotator.ReadPosition; +import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; -public class ReadPositionFilter extends HardFilter { +public class ReadPositionFilter extends HardAlleleFilter { private final double minMedianReadPosition; public ReadPositionFilter(final double minMedianReadPosition) { @@ -18,11 +19,11 @@ public ReadPositionFilter(final double minMedianReadPosition) { public ErrorType errorType() { return ErrorType.ARTIFACT; } @Override - public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { final List readPositionByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_READ_POSITON_KEY, 0); - - // a negative value is possible due to a bug: https://github.com/broadinstitute/gatk/issues/5492 - return readPositionByAllele.get(0) > -1 && readPositionByAllele.get(0) < minMedianReadPosition; + return readPositionByAllele.stream() + // a negative value is possible due to a bug: https://github.com/broadinstitute/gatk/issues/5492 + .map(readPos -> readPos > -1 && readPos < minMedianReadPosition).collect(Collectors.toList()); } @Override From 2b045c0a5f5eec8b98360e3461bcf5b2d82d701f Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 6 Jan 2020 16:49:19 -0500 Subject: [PATCH 10/85] 2 different get data methods --- .../walkers/mutect/filtering/Mutect2AlleleFilter.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 35c02b84c6a..410117deea6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -17,6 +17,16 @@ public abstract class Mutect2AlleleFilter extends Mutect2Filter { public LinkedHashMap> getDataByAllele(final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values LinkedHashMap> dataByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); + return combineDataByAllele(dataByAllele, vc, preconditions, getData, filteringEngine); + } + + public LinkedHashMap> getAltDataByAllele(final VariantContext vc, Predicate preconditions, Function> getAltData, final Mutect2FilteringEngine filteringEngine) { + // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values + LinkedHashMap> dataByAllele = vc.getAlternateAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); + return combineDataByAllele(dataByAllele, vc, preconditions, getAltData, filteringEngine); + } + + private LinkedHashMap> combineDataByAllele(final LinkedHashMap> dataByAllele, final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele vc.getGenotypes().stream().filter(preconditions).filter(filteringEngine::isTumor) From 409aff292a2daf802920089d4d5559f0da8ee637 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 7 Jan 2020 10:38:27 -0500 Subject: [PATCH 11/85] use correct filter list --- .../walkers/mutect/filtering/Mutect2FilteringEngine.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 3745e1bf6c5..bb2ad6774b4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -279,8 +279,8 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { filters.add(new NormalArtifactFilter(MTFAC.normalPileupPValueThreshold)); filters.add(new NRatioFilter(MTFAC.nRatio)); filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); - filters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); - filters.add(new MinAlleleFractionFilter(MTFAC.minAf)); + alleleFilters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); + alleleFilters.add(new MinAlleleFractionFilter(MTFAC.minAf)); if (!MTFAC.readOrientationPriorTarGzs.isEmpty()) { final List artifactTables = MTFAC.readOrientationPriorTarGzs.stream().flatMap(tarGz -> { From e54983c21a9c950cc0adacbc36d130ecacf087b7 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 7 Jan 2020 13:39:47 -0500 Subject: [PATCH 12/85] fix issues with filters containing data for ref --- .../mutect/filtering/Mutect2AlleleFilter.java | 52 +++---------------- .../filtering/Mutect2FilteringEngine.java | 22 ++++---- .../mutect/filtering/ReadPositionFilter.java | 3 +- .../mutect/filtering/TumorEvidenceFilter.java | 2 +- 4 files changed, 20 insertions(+), 59 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 410117deea6..d9a3449f583 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -41,51 +41,13 @@ private LinkedHashMap> combineDataByAllele(final LinkedHashMap applyFilter(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { -// // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values -// LinkedHashMap> dataByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); -// -// // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele -// vc.getGenotypes().stream().filter(filteringEngine::isTumor) -// .filter(checkPreconditions()) -// .forEach(g -> { -// Iterator alleleDataIterator = getData(g).iterator(); -// Iterator> dataByAlleleIterator = dataByAllele.values().iterator(); -// while(alleleDataIterator.hasNext() && dataByAlleleIterator.hasNext()) -// dataByAlleleIterator.next().add(alleleDataIterator.next()); -// -// }); -// -// // construct output map with defaults -//// LinkedHashMap probabilityByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> this.getDefaultProbability(), (a, b) -> null, () -> new LinkedHashMap<>())); -// -// // now invoke the filter giving it the map with the data separated by allele -// List probabilityByAllele = calculateErrorProbabilityForAlleles(dataByAllele, vc, filteringEngine, referenceContext); -// return probabilityByAllele; -// } - -// /** -// * Subclasses should override if they want a different default probability. -// * Keep in mind that in the final output, NaN is used to determine when indicating that no probability was computed, and . will be the output for those alleles -// * @return the default probability to use -// */ -// public Double getDefaultProbability() { -// return Double.NaN; -// } -// -// /** -// * All subclass filters should implement if they need to verify the data needed exists in the genotype -// * @return a predicate that will be applied to determine which genotypes will be part of the filter -// */ -// public abstract Predicate checkPreconditions(); - -// /** -// * All subclass filters should implement this method to return the necessary data needed to apply the filter -// * @param g the genotype to pull the data from -// * @return A list of per-allele data for each allele in the variant context (the data in the genotype is ordered by the alleles returned from vc.getAlleles() -// */ -// public abstract List getData(Genotype g); - + /** + * + * @param vc + * @param filteringEngine + * @param referenceContext + * @return The probability that each alternate allele should be filtered out. This list should NOT include data for the reference allele + */ public List errorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { return requiredAnnotations().stream().allMatch(vc::hasAttribute) ? calculateErrorProbabilityForAlleles(vc, filteringEngine, referenceContext) : diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index bb2ad6774b4..4ffdcddfa31 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -111,14 +111,6 @@ public int[] sumStrandCountsOverSamples(final VariantContext vc, final boolean i return result; } -// public int[] sumStrandCountsOverSamplesByAllele(final VariantContext vc, final boolean includeTumor, final boolean includeNormal) { -// final int[] result = new int[4]; -// vc.getGenotypes().stream().filter(g -> (includeTumor && isTumor(g)) || (includeNormal && isNormal(g))) -// .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) -// .map(g -> StrandBiasTest.getStrandCounts(g)).forEach(sbbs -> new IndexRange(0, 4).forEach(n -> result[n] += sbbs[n])); -// return result; -// } - public double[] weightedAverageOfTumorAFs(final VariantContext vc) { final MutableDouble totalWeight = new MutableDouble(0); final double[] AFs = new double[vc.getNAlleles() - 1]; @@ -274,14 +266,20 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { alleleFilters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); alleleFilters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); filters.add(new StrandArtifactFilter()); - filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); - filters.add(new PanelOfNormalsFilter()); - filters.add(new NormalArtifactFilter(MTFAC.normalPileupPValueThreshold)); - filters.add(new NRatioFilter(MTFAC.nRatio)); + alleleFilters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); alleleFilters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); alleleFilters.add(new MinAlleleFractionFilter(MTFAC.minAf)); + // convert to allele specific later + // Normal Artifact Filter doesn't apply to mitochondria because we are not comparing + // tumor and normal + filters.add(new NormalArtifactFilter(MTFAC.normalPileupPValueThreshold)); + filters.add(new NRatioFilter(MTFAC.nRatio)); + + // filters that don't apply to specific alleles + filters.add(new PanelOfNormalsFilter()); + if (!MTFAC.readOrientationPriorTarGzs.isEmpty()) { final List artifactTables = MTFAC.readOrientationPriorTarGzs.stream().flatMap(tarGz -> { final File extractDir = IOUtils.createTempDir("extract"); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java index ab372041a64..ba229619baa 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java @@ -20,8 +20,9 @@ public ReadPositionFilter(final double minMedianReadPosition) { @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + // MPOS doesn't have data for ref allele final List readPositionByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_READ_POSITON_KEY, 0); - return readPositionByAllele.stream() + return readPositionByAllele.subList(0, readPositionByAllele.size()).stream() // a negative value is possible due to a bug: https://github.com/broadinstitute/gatk/issues/5492 .map(readPos -> readPos > -1 && readPos < minMedianReadPosition).collect(Collectors.toList()); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java index b3d3ecffb81..2994f07687d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java @@ -23,7 +23,7 @@ protected List calculateErrorProbabilityForAlleles(final VariantContext SomaticClusteringModel model = filteringEngine.getSomaticClusteringModel(); List altResults = new ArrayList<>(); - new IndexRange(0, tumorLods.length).forEach(i -> + new IndexRange(1, tumorLods.length).forEach(i -> altResults.add(model.probabilityOfSequencingError(new Datum(tumorLods[i], 0, 0, ADs[i+1], totalCount, SomaticClusteringModel.indelLength(vc, i))))); return altResults; From 9f7891786067a0deb47d882bd6c7cce1991ab964 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 10 Jan 2020 13:18:08 -0500 Subject: [PATCH 13/85] wip, doens't pass tests, fixing error prob for threshold --- .../mutect/filtering/ContaminationFilter.java | 52 ++++++++++++++++++- .../filtering/Mutect2FilteringEngine.java | 2 + .../filtering/StrictStrandBiasFilter.java | 46 ++++++++++++++-- .../mutect/filtering/TumorEvidenceFilter.java | 3 +- .../mutect/Mutect2IntegrationTest.java | 4 +- 5 files changed, 100 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java index 3c976e9b218..bc465f67b5c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java @@ -3,9 +3,12 @@ import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFConstants; +import org.apache.commons.lang.math.IntRange; import org.apache.commons.lang3.tuple.ImmutablePair; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.contamination.ContaminationRecord; +import org.broadinstitute.hellbender.tools.walkers.mutect.clustering.SomaticClusteringModel; +import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; @@ -14,7 +17,7 @@ import java.util.*; import java.util.stream.Collectors; -public class ContaminationFilter extends Mutect2VariantFilter { +public class ContaminationFilter extends Mutect2AlleleFilter { private final Map contaminationBySample; private final double defaultContamination; private final double EPSILON = 1.0e-10; @@ -31,6 +34,53 @@ public ContaminationFilter(final List contaminationTables, final double co public ErrorType errorType() { return ErrorType.NON_SOMATIC; } @Override + public List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + // for every alt allele, a list of the depth and posterior pair + final List>> depthsAndPosteriorsPerAllele = new ArrayList<>(); + new IndexRange(0, vc.getNAlleles()-1).forEach(i -> depthsAndPosteriorsPerAllele.add(new ArrayList<>())); + + for (final Genotype tumorGenotype : vc.getGenotypes()) { + if (filteringEngine.isNormal(tumorGenotype)) { + continue; + } + + final double contaminationFromFile = contaminationBySample.getOrDefault(tumorGenotype.getSampleName(), defaultContamination); + final double contamination = Math.max(0, Math.min(contaminationFromFile, 1 - EPSILON)); // handle file with contamination == 1 + final int[] ADs = tumorGenotype.getAD(); // AD is all alleles, while AF is alts only, hence the +1 offset + final int totalAD = (int) MathUtils.sum(ADs); + final int[] altADs = Arrays.copyOfRange(ADs, 1, ADs.length); + // POPAF has only alt allele data + final double[] negativeLog10AlleleFrequencies = VariantContextGetters.getAttributeAsDoubleArray(vc, + GATKVCFConstants.POPULATION_AF_KEY, () -> new double[]{Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY}, Double.POSITIVE_INFINITY); + final double[] alleleFrequencies = MathUtils.applyToArray(negativeLog10AlleleFrequencies, x -> Math.pow(10,-x)); + + SomaticClusteringModel model = filteringEngine.getSomaticClusteringModel(); + final double[] logSomaticLikelihoodPerAllele = Arrays.stream(altADs).mapToDouble(altCount -> model.logLikelihoodGivenSomatic(totalAD, altCount)).toArray(); + + double[] singleContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; + double[] manyContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; + double[] logContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; + double[] logOddsOfRealVsContaminationPerAllele = new double[alleleFrequencies.length]; + double[] posteriorProbOfContaminationPerAllele = new double[alleleFrequencies.length]; + new IndexRange(0,alleleFrequencies.length).forEach(i -> { + singleContaminantLikelihoodPerAllele[i] = 2 * alleleFrequencies[i] * (1 - alleleFrequencies[i]) * MathUtils.binomialProbability(totalAD, altADs[i], contamination /2) + + MathUtils.square(alleleFrequencies[i]) * MathUtils.binomialProbability(totalAD, altADs[i], contamination); + manyContaminantLikelihoodPerAllele[i] = MathUtils.binomialProbability(totalAD, altADs[i], contamination * alleleFrequencies[i]); + logContaminantLikelihoodPerAllele[i] = Math.log(Math.max(singleContaminantLikelihoodPerAllele[i], manyContaminantLikelihoodPerAllele[i])); + logOddsOfRealVsContaminationPerAllele[i] = logSomaticLikelihoodPerAllele[i] - logContaminantLikelihoodPerAllele[i]; + }); + + new IndexRange(0,alleleFrequencies.length).forEach(i -> { + posteriorProbOfContaminationPerAllele[i] = filteringEngine.posteriorProbabilityOfError(vc, logOddsOfRealVsContaminationPerAllele[i], i); + depthsAndPosteriorsPerAllele.get(i).add(ImmutablePair.of(altADs[i], posteriorProbOfContaminationPerAllele[i])); + }); + + } + + return depthsAndPosteriorsPerAllele.stream().map(alleleData -> alleleData.isEmpty() ? Double.NaN : weightedMedianPosteriorProbability(alleleData)).collect(Collectors.toList()); + } + +// @Override public double calculateErrorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { final List> depthsAndPosteriors = new ArrayList<>(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 4ffdcddfa31..6656f684563 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -239,6 +239,8 @@ private String getMergedFilterStringForAllele(List> alleleSpeci * @return Iterator of filters for an allele */ private Iterator addFilterStrings(List probabilities, List siteFilters, String filterName) { + double thresh = getThreshold(); + double min = Math.min(1 - EPSILON, Math.max(EPSILON, thresh)); List results = probabilities.stream().map(value -> value > Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())) ? filterName : VCFConstants.PASSES_FILTERS_v4).collect(Collectors.toList()); // List realFilters = results.stream().filter(x -> !x.equals(VCFConstants.EMPTY_INFO_FIELD)).collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index 66217bd6181..d0904f4045f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -1,15 +1,22 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.lang3.mutable.MutableInt; +import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.StrandBiasBySample; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; +import java.util.Arrays; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; +import java.util.function.Predicate; +import java.util.stream.Collectors; -public class StrictStrandBiasFilter extends HardFilter { +public class StrictStrandBiasFilter extends HardFilter { //HardAlleleFilter { private final int minReadsOnEachStrand; public StrictStrandBiasFilter(final int minReadsOnEachStrand) { @@ -19,6 +26,38 @@ public StrictStrandBiasFilter(final int minReadsOnEachStrand) { @Override public ErrorType errorType() { return ErrorType.ARTIFACT; } + public Predicate checkPreconditions() { + return g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY); + } + + public List getData(Genotype g) { + int[] data = GATKProtectedVariantContextUtils.getAttributeAsIntArray(g, GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, () -> null, 0); + return Arrays.stream(data).boxed().collect(Collectors.toList()); + } + +// @Override +// public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { +// final MutableInt altForwardCount = new MutableInt(0); +// final MutableInt altReverseCount = new MutableInt(0); + +// LinkedHashMap> dataByAllele = getDataByAllele(vc, checkPreconditions(), this::getData, filteringEngine); +// return dataByAllele.entrySet().stream() +// .filter(entry -> !entry.getKey().isSymbolic() && !vc.getReference().equals(entry.getKey())) +// .map(entry -> minReadsOnEachStrand > 0 && entry.getValue().stream().min(Integer::compare).orElse(0) < minReadsOnEachStrand).collect(Collectors.toList()); + + +// vc.getGenotypes().stream().filter(filteringEngine::isTumor) +// .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) +// .forEach(g -> { +// final int[] strandBiasCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(g, GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, () -> null, 0); +// altForwardCount.add(StrandBiasBySample.getAltForwardCountFromFlattenedContingencyTable(strandBiasCounts)); +// altReverseCount.add(StrandBiasBySample.getAltReverseCountFromFlattenedContingencyTable(strandBiasCounts)); +// }); +// +// // filter if there is no alt evidence in the forward or reverse strand +// return Math.min(altForwardCount.getValue(), altReverseCount.getValue()) < minReadsOnEachStrand; +//} + @Override public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { if (minReadsOnEachStrand == 0) { @@ -36,9 +75,10 @@ public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine altReverseCount.add(StrandBiasBySample.getAltReverseCountFromFlattenedContingencyTable(strandBiasCounts)); }); - // filter if there is no alt evidence in the forward or reverse strand + // filter if there is no alt evidence in the forward or reverse strand return Math.min(altForwardCount.getValue(), altReverseCount.getValue()) < minReadsOnEachStrand; -} + } + @Override public String filterName() { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java index 2994f07687d..faebb1c72ac 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java @@ -23,7 +23,8 @@ protected List calculateErrorProbabilityForAlleles(final VariantContext SomaticClusteringModel model = filteringEngine.getSomaticClusteringModel(); List altResults = new ArrayList<>(); - new IndexRange(1, tumorLods.length).forEach(i -> + // 0 is the correct value. problem with threshold + new IndexRange(0, tumorLods.length).forEach(i -> altResults.add(model.probabilityOfSequencingError(new Datum(tumorLods[i], 0, 0, ADs[i+1], totalCount, SomaticClusteringModel.indelLength(vc, i))))); return altResults; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index af801ef3fd7..0655947dbbb 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -532,7 +532,7 @@ public Object[][] vcfsForFiltering() { ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), Collections.emptySet(), - ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, + ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, //GATKVCFConstants.CONTAMINATION_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME)), Arrays.asList( @@ -540,7 +540,7 @@ public Object[][] vcfsForFiltering() { Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // ".|weak_evidence, possible_numt|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|PASS|weak_evidence, base_qual, low_allele_frac|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, map_qual, position, low_allele_frac, possible_numt|." + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME /*+ ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME*/ + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, map_qual, contamination, position, low_allele_frac, possible_numt|." )} }; } From 8e0e7d4cfcbd97d4e5095140ef84d99f8a536370 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 13 Jan 2020 11:32:11 -0500 Subject: [PATCH 14/85] implement 2 pass variant walker as post processing filter step for low heteroplasmy --- .../filtering/MTLowHeteroplasmyFilter.java | 101 ++++++++++++++++++ .../utils/variant/GATKVCFConstants.java | 1 + .../utils/variant/GATKVCFHeaderLines.java | 1 + .../mutect/Mutect2IntegrationTest.java | 12 +++ .../MTLowHeteroplasmyFilterTest.java | 35 ++++++ .../mito/expected_LowHetNone_output.txt | 69 ++++++++++++ ...HetVariantWalkerIntegrationTest_output.txt | 69 ++++++++++++ .../hellbender/tools/mutect/mito/filtered.vcf | 68 ++++++++++++ 8 files changed, 356 insertions(+) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilter.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTest.java create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilter.java new file mode 100644 index 00000000000..b8b28239742 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilter.java @@ -0,0 +1,101 @@ +package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; + +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFFilterHeaderLine; +import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadsContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.TwoPassVariantWalker; +import org.broadinstitute.hellbender.utils.MathUtils; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import picard.cmdline.programgroups.VariantFilteringProgramGroup; + +import java.io.File; + +import static org.broadinstitute.hellbender.utils.variant.GATKVCFConstants.LOW_HET_FILTER_NAME; + +@CommandLineProgramProperties( + summary = "If too many low heteroplasmy sites pass other filters, then filter all low heteroplasmy sites", + oneLineSummary = "If too many low het sites, filter all low het sites", + programGroup = VariantFilteringProgramGroup.class +) +public class MTLowHeteroplasmyFilter extends TwoPassVariantWalker { + + public static final String MIN_LOW_HET_SITES_LONG_NAME = "min-low-het-sites"; + public static final String LOW_HET_THRESHOLD_LONG_NAME = "low-het-threshold"; + + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + doc = "Output VCF file") + private String outputVcf = null; + + @Argument(fullName = MIN_LOW_HET_SITES_LONG_NAME, + doc = "Number of low het sites allowed to pass other filters before filtering out all low het sites. Default is 5", + optional=true) + private int minLowHetSites = 3; + + @Argument(fullName = LOW_HET_THRESHOLD_LONG_NAME, + doc = "Threshold for determining a low heteroplasmy site. Default is 0.1", + optional=true) + private final double lowHetThreshold = 0.1; + + private boolean failedLowHet = false; + private int unfilteredLowHetSites = 0; + + private VariantContextWriter vcfWriter; + + @Override + public void onTraversalStart() { + final VCFHeader inputHeader = getHeaderForVariants(); + // TODO why isn't it being added in the GATKVCFHeaderLines + inputHeader.addMetaDataLine(new VCFFilterHeaderLine(LOW_HET_FILTER_NAME, "All low heteroplasmy sites are filtered when at least x low het sites pass all other filters")); + vcfWriter = createVCFWriter(new File(outputVcf)); + vcfWriter.writeHeader(inputHeader); + } + + @Override + protected void firstPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + // if the site is not filtered, but it is low het increment counter + if (variant.isNotFiltered() && isLowHeteroplasmy(variant)) { + unfilteredLowHetSites++; + } + } + + @Override + protected void afterFirstPass() { + failedLowHet = unfilteredLowHetSites > minLowHetSites; + } + + @Override + protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + VariantContextBuilder vcb = new VariantContextBuilder(variant); + if (failedLowHet && isLowHeteroplasmy(variant)) { + vcb.filter(GATKVCFConstants.LOW_HET_FILTER_NAME); + } + vcfWriter.add(vcb.make()); + } + + @Override + public void closeTool() { + if ( vcfWriter != null ) { + vcfWriter.close(); + } + } + + protected boolean isLowHeteroplasmy(VariantContext v) { + // does 0.0 make sense for orElse? + return v.getGenotypes().stream().map(g -> lowestAF(g)).min(Double::compareTo).orElse(0.0) < lowHetThreshold; + } + + protected double lowestAF(Genotype g) { + int[] depths = g.getAD(); + return MathUtils.arrayMin(depths)/ MathUtils.sum(depths); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index 89c68f72900..17501221ee4 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -164,6 +164,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i public final static String CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME = "numt_chimera"; //mitochondria public final static String ALLELE_FRACTION_FILTER_NAME = "low_allele_frac"; public static final String POSSIBLE_NUMT_FILTER_NAME = "possible_numt"; + public static final String LOW_HET_FILTER_NAME = "low_het"; public static final List MUTECT_FILTER_NAMES = Arrays.asList(VCFConstants.PASSES_FILTERS_v4, POLYMERASE_SLIPPAGE, PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_EVIDENCE_FILTER_NAME, GERMLINE_RISK_FILTER_NAME, diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index 5874f1a64d4..5db25e67391 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -103,6 +103,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf //Mitochondrial M2-related filters addFilterLine(new VCFFilterHeaderLine(CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME, "NuMT variant with too many ALT reads originally from autosome")); addFilterLine(new VCFFilterHeaderLine(POSSIBLE_NUMT_FILTER_NAME, "Allele depth is below expected coverage of NuMT in autosome")); + addFilterLine(new VCFFilterHeaderLine(LOW_HET_FILTER_NAME, "All low heteroplasmy sites are filtered when at least x low het sites pass all other filters")); addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); addFormatLine(new VCFFormatHeaderLine(MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 0655947dbbb..e7df36d71b1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -14,6 +14,7 @@ import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReadThreadingAssemblerArgumentCollection; @@ -578,6 +579,17 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti Assert.assertEquals(actualFilters, expectedFilters); } +// @Test +// public void testLowHetTool() throws IOException { +// final IntegrationTestSpec testSpec = new IntegrationTestSpec( +// " -R " + MITO_REF.getAbsolutePath() + +// " -V " + NA12878_MITO_FILTERED_VCF + +// " -O %s", +// Arrays.asList(TEST_OUTPUT_DIRECTORY + "expected_ExampleVariantWalkerIntegrationTest_output.txt") +// ); +// testSpec.executeTest("testExampleVariantWalker", this); +// } + @Test public void testMitochondrialRefConf() { Utils.resetRandomGenerator(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTest.java new file mode 100644 index 00000000000..1a84a724c75 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTest.java @@ -0,0 +1,35 @@ +package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; + +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; + +public class MTLowHeteroplasmyFilterTest extends CommandLineProgramTest { + private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); + private static final File NA12878_MITO_FILTERED_VCF = new File(toolsTestDir, "mutect/mito/filtered.vcf"); + + @Test + public void testLowHetVariantWalker() throws IOException { + final IntegrationTestSpec testSpec = new IntegrationTestSpec( + " -R " + MITO_REF.getAbsolutePath() + + " -V " + NA12878_MITO_FILTERED_VCF + + " -O %s", + Arrays.asList(toolsTestDir + "mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt") + ); + testSpec.executeTest("testLowHetVariantWalker", this); + + final IntegrationTestSpec testLowHetNoneSpec = new IntegrationTestSpec( + " -R " + MITO_REF.getAbsolutePath() + + " -V " + NA12878_MITO_FILTERED_VCF + + " -O %s" + + " --min-low-het-sites 5", + Arrays.asList(toolsTestDir + "mutect/mito/expected_LowHetNone_output.txt") + ); + testLowHetNoneSpec.executeTest("testLowHetVariantWalker", this); + } + +} diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt new file mode 100644 index 00000000000..976f3abac55 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt @@ -0,0 +1,69 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##Mutect Version=2.1 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##source=FilterMutectCalls +##source=Mutect2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 152 . T C . PASS AS_FilterStatus=.|PASS;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . numt_chimera AS_FilterStatus=.|PASS;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 301 . A AC . low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=.|PASS|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . PASS AS_FilterStatus=.|PASS;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt new file mode 100644 index 00000000000..b8b7328183e --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt @@ -0,0 +1,69 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##Mutect Version=2.1 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##source=FilterMutectCalls +##source=Mutect2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 152 . T C . low_het AS_FilterStatus=.|PASS;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . low_het;numt_chimera AS_FilterStatus=.|PASS;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 301 . A AC . low_allele_frac;low_het;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . low_het AS_FilterStatus=.|PASS|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . low_het AS_FilterStatus=.|PASS;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . low_het AS_FilterStatus=.|PASS;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf new file mode 100644 index 00000000000..c72b2c7b6ab --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf @@ -0,0 +1,68 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##Mutect Version=2.1 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##source=FilterMutectCalls +##source=Mutect2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 152 . T C . PASS AS_FilterStatus=.|PASS;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . numt_chimera AS_FilterStatus=.|PASS;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 301 . A AC . low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=.|PASS|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . PASS AS_FilterStatus=.|PASS;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 660c4f9e0c3b174add645baade6d89f49214d9f1 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 13 Jan 2020 20:05:36 -0500 Subject: [PATCH 15/85] wip - got error prob to compile --- .../mutect/filtering/ErrorProbabilities.java | 74 ++++++++++++------- .../mutect/filtering/Mutect2AlleleFilter.java | 3 +- .../mutect/filtering/Mutect2Filter.java | 12 ++- .../filtering/Mutect2VariantFilter.java | 15 +++- 4 files changed, 74 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index a1268a79eb8..ce620a14087 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -3,40 +3,64 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2; import java.util.*; -import java.util.function.DoubleSupplier; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.stream.Collectors.*; public final class ErrorProbabilities { - private final Map probabilitiesByFilter; - private final LinkedHashMap, List> probabilitiesByFilterAndAllele; - private final EnumMap probabilitiesByType; - private final double errorProbability; + private final LinkedHashMap> probabilitiesByFilterAndAllele; + private final LinkedHashMap>> probabilitiesByAllelesForEachFilter; + private final List errorProbabilityByAllele; + private final Map> probabilitiesByTypeAndAllele; + private final int numAltAlleles; - public ErrorProbabilities(final List variantFilters, final List> alleleFilters, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, final ReferenceContext referenceContext) { - probabilitiesByFilter = variantFilters.stream().collect(Collectors.toMap(Function.identity(), f -> f.errorProbability(vc, filteringEngine, referenceContext))); - probabilitiesByFilterAndAllele = alleleFilters.stream().collect(Collectors.toMap(Function.identity(), f -> f.errorProbability(vc, filteringEngine, referenceContext), (a,b) -> a, () -> new LinkedHashMap<>())); - probabilitiesByType = Arrays.stream(ErrorType.values()).collect(Collectors.toMap(v -> v, v -> 0.0, (a,b) -> a, () -> new EnumMap<>(ErrorType.class))); - variantFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> Math.max(prob, probabilitiesByFilter.get(f)))); -// alleleFilters.forEach(f -> probabilitiesByType.compute(f.errorType(), (type,prob) -> -// Math.max(prob, -// probabilitiesByFilterAndAllele.get(f).values().stream().filter(d -> !d.isNaN()).max(Double::compare).orElseGet(() -> 0.0)))); + public ErrorProbabilities(final List filters, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, final ReferenceContext referenceContext) { + numAltAlleles = vc.getAlternateAlleles().size(); +// EnumMap> filterByType = filters.stream() +// .collect(groupingBy(Mutect2Filter::errorType, () -> new EnumMap<>(ErrorType.class), toList())); + probabilitiesByFilterAndAllele = filters.stream().collect(toMap( + Function.identity(), + f -> f.errorProbabilities(vc, filteringEngine, referenceContext), + (a,b) -> a, LinkedHashMap::new)); + probabilitiesByAllelesForEachFilter = probabilitiesByFilterAndAllele.entrySet().stream().collect( + groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); +// probabilitiesByAllelesForEachFilter = filterByType.entrySet().stream().collect(Collectors.toMap( +// Map.Entry::getKey, +// entry -> entry.getValue().stream().map(f -> f.errorProbabilities(vc, filteringEngine, referenceContext)).collect(Collectors.toList()), +// (a,b) -> a, LinkedHashMap::new)); + probabilitiesByAllelesForEachFilter.replaceAll((k, v) -> ErrorProbabilities.transpose(v)); + + probabilitiesByTypeAndAllele = probabilitiesByAllelesForEachFilter.entrySet().stream().collect(toMap( + Map.Entry::getKey, + entry -> entry.getValue().stream().map(alleleList -> alleleList.stream().max(Double::compare).orElse(0.0)).collect(Collectors.toList()), + (a,b) -> a, HashMap::new)); - // treat errors of different types as independent - double trueProbability = 1; - for (final double errorProb : probabilitiesByType.values()) { - trueProbability *= (1 - errorProb); - } - errorProbability = Mutect2FilteringEngine.roundFinitePrecisionErrors(1 - trueProbability); + // treat errors of different types as independent + errorProbabilityByAllele = transpose(probabilitiesByTypeAndAllele.values().stream().collect(toList())) + .stream().map( + alleleProbabilities -> alleleProbabilities.stream().map(p -> 1.0 - p).reduce(1.0, (a, b) -> a * b)).collect(Collectors.toList()); + errorProbabilityByAllele.replaceAll(trueProb -> Mutect2FilteringEngine.roundFinitePrecisionErrors(1.0 - trueProb)); } - public double getErrorProbability() { return errorProbability; } - public double getTechnicalArtifactProbability() { return probabilitiesByType.get(ErrorType.ARTIFACT); } - public double getNonSomaticProbability() { return probabilitiesByType.get(ErrorType.NON_SOMATIC); } - public Map getProbabilitiesByFilter() { return probabilitiesByFilter; } - public Map, List> getProbabilitiesByFilterAndAllele() { return probabilitiesByFilterAndAllele; } -} + public List getErrorProbability() { return errorProbabilityByAllele; } + public List getTechnicalArtifactProbability() { return probabilitiesByTypeAndAllele.get(ErrorType.ARTIFACT); } + public List getNonSomaticProbability() { return probabilitiesByTypeAndAllele.get(ErrorType.NON_SOMATIC); } + public Map> getProbabilitiesByFilterAndAllele() { return probabilitiesByFilterAndAllele; } + + public static List> transpose(List> list) { + final int N = list.stream().mapToInt(l -> l.size()).max().orElse(-1); + List> iterList = list.stream().map(it->it.iterator()).collect(toList()); + return IntStream.range(0, N) + .mapToObj(n -> iterList.stream() + .filter(it -> it.hasNext()) + .map(m -> m.next()) + .collect(toList())) + .collect(toList()); + }} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index d9a3449f583..3fa83a86f20 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -48,7 +48,8 @@ private LinkedHashMap> combineDataByAllele(final LinkedHashMap errorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + @Override + public List errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { return requiredAnnotations().stream().allMatch(vc::hasAttribute) ? calculateErrorProbabilityForAlleles(vc, filteringEngine, referenceContext) : // TODO make sure that somewhere the roundFinitePrecisionErrors is called when not a hard filter diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java index 4a541cc82d7..fb869e884c5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java @@ -2,6 +2,7 @@ import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.broadinstitute.hellbender.engine.ReferenceContext; import java.util.Comparator; import java.util.List; @@ -22,7 +23,16 @@ protected void learnParametersAndClearAccumulatedData() { public abstract Optional phredScaledPosteriorAnnotationName(); protected abstract List requiredAnnotations(); - // weighted median -- what's the lowest posterior probability that accounts for samples with half of the total alt depth + /** + * + * @param vc + * @param filteringEngine + * @param referenceContext + * @return The probability that each alternate allele should be filtered out. This list should NOT include data for the reference allele + */ + public abstract List errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); + + // weighted median -- what's the lowest posterior probability that accounts for samples with half of the total alt depth protected static double weightedMedianPosteriorProbability(List> depthsAndPosteriors) { final int totalAltDepth = depthsAndPosteriors.stream().mapToInt(ImmutablePair::getLeft).sum(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java index a1929e165e0..398fdd18710 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java @@ -3,12 +3,21 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + public abstract class Mutect2VariantFilter extends Mutect2Filter { public Mutect2VariantFilter() { } - public double errorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - final double result = requiredAnnotations().stream().allMatch(vc::hasAttribute) ? calculateErrorProbability(vc, filteringEngine, referenceContext) : 0; - return Mutect2FilteringEngine.roundFinitePrecisionErrors(result); + @Override + public List errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + int numAltAlleles = vc.getNAlleles() - 1; + final double result = requiredAnnotations().stream().allMatch(vc::hasAttribute) ? calculateErrorProbability(vc, filteringEngine, referenceContext) : 0.0; + ArrayList resultList = new ArrayList<>(numAltAlleles); + Collections.fill(resultList, Mutect2FilteringEngine.roundFinitePrecisionErrors(result)); + return resultList; + } protected abstract double calculateErrorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); From eb2a590720f9ab088adac321ac3d7d43f83c7491 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 14 Jan 2020 12:41:39 -0500 Subject: [PATCH 16/85] wip - fixed almost all compile errors --- .../clustering/SomaticClusteringModel.java | 19 +-- .../mutect/filtering/ErrorProbabilities.java | 26 ++-- .../filtering/FilteringOutputStats.java | 19 ++- .../filtering/Mutect2FilteringEngine.java | 114 +++++++++++------- .../mutect/filtering/ThresholdCalculator.java | 12 +- 5 files changed, 104 insertions(+), 86 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java index 614ad63ca47..a5697d7068f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java @@ -88,19 +88,20 @@ public SomaticClusteringModel(final M2FiltersArgumentCollection MTFAC, final Lis logClusterWeights = new double[] {Math.log1p(INITIAL_HIGH_AF_WEIGHT), Math.log(INITIAL_HIGH_AF_WEIGHT)}; } - public void record(final int[] tumorADs, final double[] tumorLogOdds, final double artifactProbability, final double nonSomaticProbability, final VariantContext vc) { - // things that are definitely not somatic don't need to go in the somatic clustering model - if (artifactProbability > OBVIOUS_ARTIFACT_PROBABILITY_THRESHOLD) { - obviousArtifactCount.increment(); - return; - } else if (nonSomaticProbability > OBVIOUS_ARTIFACT_PROBABILITY_THRESHOLD) { - return; - } + public void record(final int[] tumorADs, final double[] tumorLogOdds, final List artifactProbabilities, final List nonSomaticProbabilities, final VariantContext vc) { final int totalAD = (int) MathUtils.sum(tumorADs); // split into one-vs-all biallelics for clustering for (int i = 0; i < tumorLogOdds.length; i++) { - data.add(new Datum(tumorLogOdds[i], artifactProbability, nonSomaticProbability, tumorADs[i+1], totalAD, indelLength(vc, i))); + // things that are definitely not somatic don't need to go in the somatic clustering model + if (artifactProbabilities.get(i) > OBVIOUS_ARTIFACT_PROBABILITY_THRESHOLD) { + obviousArtifactCount.increment(); + continue; + } else if (nonSomaticProbabilities.get(i) > OBVIOUS_ARTIFACT_PROBABILITY_THRESHOLD) { + continue; + } + + data.add(new Datum(tumorLogOdds[i], artifactProbabilities.get(i), nonSomaticProbabilities.get(i), tumorADs[i+1], totalAD, indelLength(vc, i))); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index ce620a14087..7cb06d0d5f3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -1,9 +1,7 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; -import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2; import java.util.*; import java.util.function.Function; @@ -13,10 +11,9 @@ import static java.util.stream.Collectors.*; public final class ErrorProbabilities { - private final LinkedHashMap> probabilitiesByFilterAndAllele; - private final LinkedHashMap>> probabilitiesByAllelesForEachFilter; - private final List errorProbabilityByAllele; + private final LinkedHashMap> alleleProbabilitiesByFilter; private final Map> probabilitiesByTypeAndAllele; + private final List combinedErrorProbabilitiesByAllele; private final int numAltAlleles; @@ -24,11 +21,11 @@ public ErrorProbabilities(final List filters, final VariantContex numAltAlleles = vc.getAlternateAlleles().size(); // EnumMap> filterByType = filters.stream() // .collect(groupingBy(Mutect2Filter::errorType, () -> new EnumMap<>(ErrorType.class), toList())); - probabilitiesByFilterAndAllele = filters.stream().collect(toMap( + alleleProbabilitiesByFilter = filters.stream().collect(toMap( Function.identity(), f -> f.errorProbabilities(vc, filteringEngine, referenceContext), (a,b) -> a, LinkedHashMap::new)); - probabilitiesByAllelesForEachFilter = probabilitiesByFilterAndAllele.entrySet().stream().collect( + LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); // probabilitiesByAllelesForEachFilter = filterByType.entrySet().stream().collect(Collectors.toMap( // Map.Entry::getKey, @@ -43,18 +40,19 @@ public ErrorProbabilities(final List filters, final VariantContex // treat errors of different types as independent - errorProbabilityByAllele = transpose(probabilitiesByTypeAndAllele.values().stream().collect(toList())) + combinedErrorProbabilitiesByAllele = transpose(probabilitiesByTypeAndAllele.values().stream().collect(toList())) .stream().map( alleleProbabilities -> alleleProbabilities.stream().map(p -> 1.0 - p).reduce(1.0, (a, b) -> a * b)).collect(Collectors.toList()); - errorProbabilityByAllele.replaceAll(trueProb -> Mutect2FilteringEngine.roundFinitePrecisionErrors(1.0 - trueProb)); + combinedErrorProbabilitiesByAllele.replaceAll(trueProb -> Mutect2FilteringEngine.roundFinitePrecisionErrors(1.0 - trueProb)); } - public List getErrorProbability() { return errorProbabilityByAllele; } - public List getTechnicalArtifactProbability() { return probabilitiesByTypeAndAllele.get(ErrorType.ARTIFACT); } - public List getNonSomaticProbability() { return probabilitiesByTypeAndAllele.get(ErrorType.NON_SOMATIC); } - public Map> getProbabilitiesByFilterAndAllele() { return probabilitiesByFilterAndAllele; } + public List getCombinedErrorProbabilities() { return combinedErrorProbabilitiesByAllele; } + public List getTechnicalArtifactProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.ARTIFACT); } + public List getNonSomaticProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.NON_SOMATIC); } + public Map> getAlleleProbabilitiesByFilter() { return alleleProbabilitiesByFilter; } - public static List> transpose(List> list) { + // TODO would this be useful in a util class somewhere? + private static List> transpose(List> list) { final int N = list.stream().mapToInt(l -> l.size()).max().orElse(-1); List> iterList = list.stream().map(it->it.iterator()).collect(toList()); return IntStream.range(0, N) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java index 5eb237105b7..4b3ff283bda 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java @@ -4,7 +4,6 @@ import org.apache.commons.lang3.mutable.MutableDouble; import org.apache.commons.lang3.tuple.Pair; -import java.io.File; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -19,19 +18,19 @@ public class FilteringOutputStats { private double FPs = 0; private double FNs = 0; - private Map filterFPs; - private Map filterFNs; + private Map filterFPs; + private Map filterFNs; - private final List filters; + private final List filters; - public FilteringOutputStats(final List filters) { + public FilteringOutputStats(final List filters) { this.filters = filters; filterFPs = makeEmptyFilterCounts(); filterFNs = makeEmptyFilterCounts(); } public void recordCall(final ErrorProbabilities errorProbabilities, final double threshold) { - final double errorProbability = errorProbabilities.getErrorProbability(); + final double errorProbability = errorProbabilities.getCombinedErrorProbabilities(); final boolean filtered = errorProbability > threshold; if (filtered) { @@ -42,16 +41,14 @@ public void recordCall(final ErrorProbabilities errorProbabilities, final double TPs += 1 - errorProbability; } - for (final Map.Entry entry : errorProbabilities.getProbabilitiesByFilter().entrySet()) { - final double filterArtifactProbability = entry.getValue(); + for (final Map.Entry> entry : errorProbabilities.getAlleleProbabilitiesByFilter().entrySet()) { + final List filterArtifactProbability = entry.getValue(); if (filterArtifactProbability > Mutect2FilteringEngine.EPSILON && filterArtifactProbability > threshold - Mutect2FilteringEngine.EPSILON) { filterFNs.get(entry.getKey()).add(1 - errorProbability); } else if (!filtered) { filterFPs.get(entry.getKey()).add(filterArtifactProbability); } } - - //TODO add analysis for errorProbabilities.getProbabilitiesByFilterAndAllele(); } public void writeFilteringStats(final Path filteringStats, final double threshold, List> clusteringMetadata) { @@ -66,7 +63,7 @@ public void writeFilteringStats(final Path filteringStats, final double threshol FilterStats.writeM2FilterSummary(filterStats, filteringStats, clusteringMetadata, threshold, pass, TPs, FPs, FNs); } - private Map makeEmptyFilterCounts() { + private Map makeEmptyFilterCounts() { return filters.stream().collect(Collectors.toMap(f -> f, f -> new MutableDouble(0))); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 6656f684563..a24c6bc56e2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -30,8 +30,7 @@ public class Mutect2FilteringEngine { public static final double MIN_REPORTABLE_ERROR_PROBABILITY = 0.1; - private final List filters = new ArrayList<>(); - private final List> alleleFilters = new ArrayList<>(); + private final List filters = new ArrayList<>(); private final Set normalSamples; public static final List STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING = Arrays.asList( @@ -144,22 +143,21 @@ public void accumulateData(final VariantContext vc, final ReferenceContext refer return; } - final ErrorProbabilities errorProbabilities = new ErrorProbabilities(filters, alleleFilters, vc, this, referenceContext); + final ErrorProbabilities errorProbabilities = new ErrorProbabilities(filters, vc, this, referenceContext); filters.forEach(f -> f.accumulateDataForLearning(vc, errorProbabilities, this)); - alleleFilters.forEach(f -> f.accumulateDataForLearning(vc, errorProbabilities, this)); final int[] tumorADs = sumADsOverSamples(vc, true, false); final double[] tumorLogOdds = Mutect2FilteringEngine.getTumorLogOdds(vc); - somaticClusteringModel.record(tumorADs, tumorLogOdds, errorProbabilities.getTechnicalArtifactProbability(), - errorProbabilities.getNonSomaticProbability(), vc); - thresholdCalculator.addArtifactProbability(errorProbabilities.getErrorProbability()); + somaticClusteringModel.record(tumorADs, tumorLogOdds, errorProbabilities.getTechnicalArtifactProbabilities(), + errorProbabilities.getNonSomaticProbabilities(), vc); + thresholdCalculator.addCombinedErrorProbabilites(errorProbabilities.getCombinedErrorProbabilities()); } /** * Refine model parameters based on data acquired in a non-final pass of {@link FilterMutectCalls} */ public void learnParameters() { - filters.forEach(Mutect2VariantFilter::learnParametersAndClearAccumulatedData); + filters.forEach(Mutect2Filter::learnParametersAndClearAccumulatedData); somaticClusteringModel.learnAndClearAccumulatedData(); thresholdCalculator.relearnThresholdAndClearAcumulatedProbabilities(); @@ -177,40 +175,65 @@ public void learnThreshold() { public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext vc, final ReferenceContext referenceContext) { final VariantContextBuilder vcb = new VariantContextBuilder(vc).filters(new HashSet<>()); - final ErrorProbabilities errorProbabilities = new ErrorProbabilities(filters, alleleFilters, vc, this, referenceContext); + final ErrorProbabilities errorProbabilities = new ErrorProbabilities(filters, vc, this, referenceContext); filteringOutputStats.recordCall(errorProbabilities, getThreshold() - EPSILON); - final boolean variantFailsFilters = errorProbabilities.getErrorProbability() > Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())); - final double maxErrorProb = errorProbabilities.getProbabilitiesByFilter().values().stream().mapToDouble(p->p).max().orElse(1); + // error probability must exceed threshold, and just in case threshold is bad, probabilities close to 1 must be filtered + // and probabilities close to 0 must not be filtered + double errorThreshold = Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())); - for (final Map.Entry entry : errorProbabilities.getProbabilitiesByFilter().entrySet()) { - final double errorProbability = entry.getValue(); + Map> alleleProbsByFilter = errorProbabilities.getAlleleProbabilitiesByFilter(); + Map> groups = + alleleProbsByFilter.keySet().stream().collect(Collectors.partitioningBy(f -> f.getClass().isInstance(Mutect2VariantFilter.class))); + List variantFilters = groups.get(Boolean.TRUE); + List alleleFilters = groups.get(Boolean.FALSE); - entry.getKey().phredScaledPosteriorAnnotationName().ifPresent(annotation -> { - if (entry.getKey().requiredAnnotations().stream().allMatch(vc::hasAttribute)) { + Map siteFiltersWithErrorProb = new LinkedHashMap<>(); + + // apply allele specific filters + List> ASFilters = + alleleFilters.stream() + .filter(aFilter -> !alleleProbsByFilter.get(aFilter).isEmpty()) + .map(aFilter -> addFilterStrings(alleleProbsByFilter.get(aFilter), siteFiltersWithErrorProb, errorThreshold, aFilter.filterName())).collect(Collectors.toList()); + + List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> allele.isReference() || allele.isSymbolic() ? + VCFConstants.EMPTY_INFO_FIELD : getMergedFilterStringForAllele(ASFilters)).collect(Collectors.toList()); + String finalAttrString = AnnotationUtils.encodeAnyASList(orderedASFilterStrings); + + vcb.putAttributes(Collections.singletonMap(GATKVCFConstants.AS_FILTER_STATUS_KEY, finalAttrString)); + + + // compute site-only filters + for (final Mutect2Filter vFilter: variantFilters) { + final List filterProbabilities = alleleProbsByFilter.get(vFilter); + if (filterProbabilities == null || filterProbabilities.isEmpty()) continue; + + // should we check to see if all probs are the same? they should be for variant filters + double errorProbability = filterProbabilities.get(0); + + vFilter.phredScaledPosteriorAnnotationName().ifPresent(annotation -> { + if (vFilter.requiredAnnotations().stream().allMatch(vc::hasAttribute)) { vcb.attribute(annotation, QualityUtils.errorProbToQual(errorProbability)); } }); - // error probability must exceed threshold, and just in case threshold is bad, probabilities close to 1 must be filtered - // and probabilities close to 0 must not be filtered - if (variantFailsFilters && errorProbability >= Math.min(maxErrorProb, MIN_REPORTABLE_ERROR_PROBABILITY)) { - vcb.filter(entry.getKey().filterName()); + if (errorProbability > errorThreshold) { + siteFiltersWithErrorProb.put(vFilter.filterName(), errorProbability); } } - // apply allele specific filters - List siteFilters = new ArrayList<>(); - List> ASFilters = - errorProbabilities.getProbabilitiesByFilterAndAllele().entrySet().stream().filter(entry -> !entry.getValue().isEmpty()).map( - entry -> addFilterStrings(entry.getValue(), siteFilters, entry.getKey().filterName())).collect(Collectors.toList()); + // TO reviewers - should there be a flag where this is skipped and all filters are in the output vcf? + // otherwise things may seem erroneous. and should we apply this type of limit on the allele specific filters too? - siteFilters.forEach(vcb::filter); - List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> allele.isReference() || allele.isSymbolic() ? - VCFConstants.EMPTY_INFO_FIELD : getMergedFilterStringForAllele(ASFilters)).collect(Collectors.toList()); - String finalAttrString = AnnotationUtils.encodeAnyASList(orderedASFilterStrings); + // this code limits the number of filters specified for any variant to the highest probability filters + // this will not change the status of whether a variant is actually filtered or not + final double maxErrorProb = siteFiltersWithErrorProb.values().stream().mapToDouble(p->p).max().orElse(1); + siteFiltersWithErrorProb.entrySet().stream().forEach(entry -> { + if (entry.getValue() >= Math.min(maxErrorProb, MIN_REPORTABLE_ERROR_PROBABILITY)) { + vcb.filter(entry.getKey()); + } + }); - vcb.putAttributes(Collections.singletonMap(GATKVCFConstants.AS_FILTER_STATUS_KEY, finalAttrString)); return vcb.make(); } @@ -234,18 +257,17 @@ private String getMergedFilterStringForAllele(List> alleleSpeci /** * For each allele, determine whether the filter should be applied. also determine if the filter should apply to the site * @param probabilities the probability computed by the filter for the allele - * @param siteFilters output value - filter name is added if it should apply to the site + * @param siteFiltersWithErrorProb in/out parameter that is collecting site level filters with the max error probability + * @param errorThreshold the theshold to use to determine whether filter applies * @param filterName the name of the filter used in the vcf * @return Iterator of filters for an allele */ - private Iterator addFilterStrings(List probabilities, List siteFilters, String filterName) { - double thresh = getThreshold(); - double min = Math.min(1 - EPSILON, Math.max(EPSILON, thresh)); - List results = probabilities.stream().map(value -> value > Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())) ? + private Iterator addFilterStrings(List probabilities, Map siteFiltersWithErrorProb, double errorThreshold, String filterName) { + List results = probabilities.stream().map(value -> value > errorThreshold ? filterName : VCFConstants.PASSES_FILTERS_v4).collect(Collectors.toList()); -// List realFilters = results.stream().filter(x -> !x.equals(VCFConstants.EMPTY_INFO_FIELD)).collect(Collectors.toList()); if (!results.isEmpty() && results.stream().allMatch(x -> x.equals(filterName))) { - siteFilters.add(filterName); + // TODO is this the correct default + siteFiltersWithErrorProb.put(filterName, probabilities.stream().max(Double::compareTo).orElse(0.0)); } return results.iterator(); } @@ -263,15 +285,15 @@ public void writeFilteringStats(final Path filteringStats) { } private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { - alleleFilters.add(new TumorEvidenceFilter()); - alleleFilters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); - alleleFilters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); - alleleFilters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); - filters.add(new StrandArtifactFilter()); - alleleFilters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); - filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); - alleleFilters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); - alleleFilters.add(new MinAlleleFractionFilter(MTFAC.minAf)); + filters.add(new TumorEvidenceFilter()); + filters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); + filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); + filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); + filters.add(new StrandArtifactFilter()); // convert + filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); // test + filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); // convert + filters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); + filters.add(new MinAlleleFractionFilter(MTFAC.minAf)); // convert to allele specific later // Normal Artifact Filter doesn't apply to mitochondria because we are not comparing @@ -294,7 +316,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { if (MTFAC.mitochondria) { filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); - alleleFilters.add(new NuMTFilter(MTFAC.medianAutosomalCoverage, MTFAC.maxNuMTAutosomalCopies)); + filters.add(new NuMTFilter(MTFAC.medianAutosomalCoverage, MTFAC.maxNuMTAutosomalCopies)); } else { filters.add(new ClusteredEventsFilter(MTFAC.maxEventsInRegion)); filters.add(new MultiallelicFilter(MTFAC.numAltAllelesThreshold)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ThresholdCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ThresholdCalculator.java index 46d0098b803..c939c807fc4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ThresholdCalculator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ThresholdCalculator.java @@ -20,7 +20,7 @@ public enum Strategy { private double threshold; - final List artifactProbabilities = new ArrayList<>(); + final List errorProbabilities = new ArrayList<>(); public ThresholdCalculator(final Strategy strategy, final double initialThreshold, final double maxFalseDiscoveryRate, final double fScoreBeta) { this.strategy = strategy; @@ -29,8 +29,8 @@ public ThresholdCalculator(final Strategy strategy, final double initialThreshol this.fScoreBeta = fScoreBeta; } - public void addArtifactProbability(final double artifactProbability) { - artifactProbabilities.add(artifactProbability); + public void addCombinedErrorProbabilites(final List errorProbabilities) { + this.errorProbabilities.addAll(errorProbabilities); } public void relearnThresholdAndClearAcumulatedProbabilities() { @@ -38,10 +38,10 @@ public void relearnThresholdAndClearAcumulatedProbabilities() { case CONSTANT: // don't adjust break; case FALSE_DISCOVERY_RATE: - threshold = ThresholdCalculator.calculateThresholdBasedOnFalseDiscoveryRate(artifactProbabilities, maxFalseDiscoveryRate); + threshold = ThresholdCalculator.calculateThresholdBasedOnFalseDiscoveryRate(errorProbabilities, maxFalseDiscoveryRate); break; case OPTIMAL_F_SCORE: - threshold = ThresholdCalculator.calculateThresholdBasedOnOptimalFScore(artifactProbabilities, fScoreBeta); + threshold = ThresholdCalculator.calculateThresholdBasedOnOptimalFScore(errorProbabilities, fScoreBeta); break; default: throw new GATKException.ShouldNeverReachHereException("Invalid threshold strategy type: " + strategy + "."); @@ -50,7 +50,7 @@ public void relearnThresholdAndClearAcumulatedProbabilities() { } public void clear() { - artifactProbabilities.clear(); + errorProbabilities.clear(); } public double getThreshold() { From 9938a541588cb95d217369d4d34979690d4c5f72 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 14 Jan 2020 15:36:28 -0500 Subject: [PATCH 17/85] wip almost done - one more q for DB --- .../mutect/filtering/ErrorProbabilities.java | 38 ++++++++---- .../filtering/FilteredHaplotypeFilter.java | 4 +- .../filtering/FilteringOutputStats.java | 62 +++++++++++++------ .../filtering/Mutect2FilteringEngine.java | 41 +++++------- 4 files changed, 88 insertions(+), 57 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 7cb06d0d5f3..b115f592ca6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -19,18 +19,16 @@ public final class ErrorProbabilities { public ErrorProbabilities(final List filters, final VariantContext vc, final Mutect2FilteringEngine filteringEngine, final ReferenceContext referenceContext) { numAltAlleles = vc.getAlternateAlleles().size(); -// EnumMap> filterByType = filters.stream() -// .collect(groupingBy(Mutect2Filter::errorType, () -> new EnumMap<>(ErrorType.class), toList())); - alleleProbabilitiesByFilter = filters.stream().collect(toMap( - Function.identity(), - f -> f.errorProbabilities(vc, filteringEngine, referenceContext), - (a,b) -> a, LinkedHashMap::new)); + alleleProbabilitiesByFilter = filters.stream() + .collect(toMap( + Function.identity(), + f -> f.errorProbabilities(vc, filteringEngine, referenceContext), + (a, b) -> a, LinkedHashMap::new)) + // remove filters that were not applied. i.e. returned empty list + .entrySet().stream().filter(entry -> !entry.getValue().isEmpty()) + .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (a, b) -> a, LinkedHashMap::new)); LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); -// probabilitiesByAllelesForEachFilter = filterByType.entrySet().stream().collect(Collectors.toMap( -// Map.Entry::getKey, -// entry -> entry.getValue().stream().map(f -> f.errorProbabilities(vc, filteringEngine, referenceContext)).collect(Collectors.toList()), -// (a,b) -> a, LinkedHashMap::new)); probabilitiesByAllelesForEachFilter.replaceAll((k, v) -> ErrorProbabilities.transpose(v)); probabilitiesByTypeAndAllele = probabilitiesByAllelesForEachFilter.entrySet().stream().collect(toMap( @@ -49,7 +47,25 @@ public ErrorProbabilities(final List filters, final VariantContex public List getCombinedErrorProbabilities() { return combinedErrorProbabilitiesByAllele; } public List getTechnicalArtifactProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.ARTIFACT); } public List getNonSomaticProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.NON_SOMATIC); } - public Map> getAlleleProbabilitiesByFilter() { return alleleProbabilitiesByFilter; } + public Map> getProbabilitiesByFilter() { return alleleProbabilitiesByFilter; } + + public Map> getProbabilitiesForAlleleFilters() { + return getPartitionedProbabilitiesByFilter(false); + } + + public Map getProbabilitiesForVariantFilters() { + return getPartitionedProbabilitiesByFilter(false).entrySet().stream() + .filter(entry -> entry.getValue() != null && !entry.getValue().isEmpty()) + .collect(toMap(entry -> entry.getKey(), entry -> entry.getValue().get(0))); + } + + private Map> getPartitionedProbabilitiesByFilter(boolean variantOnly) { + Map>> groups = + alleleProbabilitiesByFilter.entrySet().stream().collect(Collectors.partitioningBy( + entry -> entry.getKey().getClass().isInstance(Mutect2VariantFilter.class), + toMap(Map.Entry::getKey, Map.Entry::getValue, (a,b) -> a, LinkedHashMap::new))); + return groups.get(variantOnly); + } // TODO would this be useful in a util class somewhere? private static List> transpose(List> list) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java index 5b50df852e8..8baaa859fc5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java @@ -58,8 +58,8 @@ protected void accumulateDataForLearning(final VariantContext vc, final ErrorPro final double artifactProbability = errorProbabilities.getProbabilitiesByFilter().entrySet().stream() .filter(e -> e.getKey().errorType() != ErrorType.SEQUENCING) .filter(e -> !e.getKey().filterName().equals(filterName())) - .mapToDouble(e -> e.getValue()) - .max().orElse(0.0); + .flatMap(e -> e.getValue().stream()) + .max(Double::compareTo).orElse(0.0); for (final Genotype tumorGenotype : vc.getGenotypes()) { if (!filteringEngine.isTumor(tumorGenotype)) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java index 4b3ff283bda..a1120475124 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java @@ -3,7 +3,9 @@ import java.nio.file.Path; import org.apache.commons.lang3.mutable.MutableDouble; import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.hellbender.utils.IndexRange; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -30,27 +32,51 @@ public FilteringOutputStats(final List filters) { } public void recordCall(final ErrorProbabilities errorProbabilities, final double threshold) { - final double errorProbability = errorProbabilities.getCombinedErrorProbabilities(); - final boolean filtered = errorProbability > threshold; - - if (filtered) { - FNs += 1 - errorProbability; - } else { - pass++; - FPs += errorProbability; - TPs += 1 - errorProbability; - } - - for (final Map.Entry> entry : errorProbabilities.getAlleleProbabilitiesByFilter().entrySet()) { - final List filterArtifactProbability = entry.getValue(); - if (filterArtifactProbability > Mutect2FilteringEngine.EPSILON && filterArtifactProbability > threshold - Mutect2FilteringEngine.EPSILON) { - filterFNs.get(entry.getKey()).add(1 - errorProbability); - } else if (!filtered) { - filterFPs.get(entry.getKey()).add(filterArtifactProbability); + final List probabilitiesPerAllele = errorProbabilities.getCombinedErrorProbabilities(); + final List isFiltered = probabilitiesPerAllele.stream().map(p -> p > threshold).collect(Collectors.toList()); + + probabilitiesPerAllele.stream().forEach(p -> { + if (p > threshold) { + FNs += 1.0 - p; + } else { + pass++; + FPs += p; + TPs += 1 - p; } - } + }); + + new IndexRange(0, probabilitiesPerAllele.size()).forEach(i -> { + errorProbabilities.getProbabilitiesForAlleleFilters().entrySet().stream().forEach(entry -> { + double alleleProb = entry.getValue().get(i); + if (alleleProb > Mutect2FilteringEngine.EPSILON && alleleProb > threshold - Mutect2FilteringEngine.EPSILON) { + filterFNs.get(entry.getKey()).add(1 - probabilitiesPerAllele.get(i)); + } else if (!isFiltered.get(i)) { + filterFPs.get(entry.getKey()).add(alleleProb); + } + }); + }); + + //TODO fix this for variant only filters!! +// for (final Map.Entry entry : errorProbabilities.getProbabilitiesForVariantFilters().entrySet()) { +// final double filterArtifactProbability = entry.getValue(); +// if (filterArtifactProbability > Mutect2FilteringEngine.EPSILON && filterArtifactProbability > threshold - Mutect2FilteringEngine.EPSILON) { +// filterFNs.get(entry.getKey()).add(1 - errorProbability); +// } else if (!filtered) { +// filterFPs.get(entry.getKey()).add(filterArtifactProbability); +// } +// } } +// for (final Map.Entry entry : errorProbabilities.getProbabilitiesByFilter().entrySet()) { +// final double filterArtifactProbability = entry.getValue(); +// if (filterArtifactProbability > Mutect2FilteringEngine.EPSILON && filterArtifactProbability > threshold - Mutect2FilteringEngine.EPSILON) { +// filterFNs.get(entry.getKey()).add(1 - errorProbability); +// } else if (!filtered) { +// filterFPs.get(entry.getKey()).add(filterArtifactProbability); +// } +// } + + public void writeFilteringStats(final Path filteringStats, final double threshold, List> clusteringMetadata) { final double totalTrueVariants = TPs + FNs; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index a24c6bc56e2..6a9b6b2a4b7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -182,19 +182,13 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext // and probabilities close to 0 must not be filtered double errorThreshold = Math.min(1 - EPSILON, Math.max(EPSILON, getThreshold())); - Map> alleleProbsByFilter = errorProbabilities.getAlleleProbabilitiesByFilter(); - Map> groups = - alleleProbsByFilter.keySet().stream().collect(Collectors.partitioningBy(f -> f.getClass().isInstance(Mutect2VariantFilter.class))); - List variantFilters = groups.get(Boolean.TRUE); - List alleleFilters = groups.get(Boolean.FALSE); - Map siteFiltersWithErrorProb = new LinkedHashMap<>(); // apply allele specific filters List> ASFilters = - alleleFilters.stream() - .filter(aFilter -> !alleleProbsByFilter.get(aFilter).isEmpty()) - .map(aFilter -> addFilterStrings(alleleProbsByFilter.get(aFilter), siteFiltersWithErrorProb, errorThreshold, aFilter.filterName())).collect(Collectors.toList()); + errorProbabilities.getProbabilitiesForAlleleFilters().entrySet().stream() + .filter(entry -> !entry.getValue().isEmpty()) + .map(entry -> addFilterStrings(entry.getValue(), siteFiltersWithErrorProb, errorThreshold, entry.getKey().filterName())).collect(Collectors.toList()); List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> allele.isReference() || allele.isSymbolic() ? VCFConstants.EMPTY_INFO_FIELD : getMergedFilterStringForAllele(ASFilters)).collect(Collectors.toList()); @@ -204,23 +198,18 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext // compute site-only filters - for (final Mutect2Filter vFilter: variantFilters) { - final List filterProbabilities = alleleProbsByFilter.get(vFilter); - if (filterProbabilities == null || filterProbabilities.isEmpty()) continue; - - // should we check to see if all probs are the same? they should be for variant filters - double errorProbability = filterProbabilities.get(0); - - vFilter.phredScaledPosteriorAnnotationName().ifPresent(annotation -> { - if (vFilter.requiredAnnotations().stream().allMatch(vc::hasAttribute)) { - vcb.attribute(annotation, QualityUtils.errorProbToQual(errorProbability)); - } - }); - - if (errorProbability > errorThreshold) { - siteFiltersWithErrorProb.put(vFilter.filterName(), errorProbability); - } - } + errorProbabilities.getProbabilitiesForVariantFilters().entrySet().stream() + .forEach(entry -> { + entry.getKey().phredScaledPosteriorAnnotationName().ifPresent(annotation -> { + if (entry.getKey().requiredAnnotations().stream().allMatch(vc::hasAttribute)) { + vcb.attribute(annotation, QualityUtils.errorProbToQual(entry.getValue())); + } + }); + if (entry.getValue() > errorThreshold) { + siteFiltersWithErrorProb.put(entry.getKey().filterName(), entry.getValue()); + } + + }); // TO reviewers - should there be a flag where this is skipped and all filters are in the output vcf? // otherwise things may seem erroneous. and should we apply this type of limit on the allele specific filters too? From 63c414706263af4740369dac14af815499f571bd Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 14 Jan 2020 17:39:58 -0500 Subject: [PATCH 18/85] fixed some bugs --- .../walkers/mutect/filtering/ErrorProbabilities.java | 11 ++++++----- .../mutect/filtering/FilteringOutputStats.java | 1 - .../mutect/filtering/MappingQualityFilter.java | 2 +- .../mutect/filtering/MinAlleleFractionFilter.java | 2 +- .../mutect/filtering/Mutect2FilteringEngine.java | 2 +- .../mutect/filtering/Mutect2VariantFilter.java | 7 ++++--- .../tools/walkers/mutect/filtering/NuMTFilter.java | 2 +- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 11 ----------- 8 files changed, 14 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index b115f592ca6..5fa466d8cef 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -11,7 +11,7 @@ import static java.util.stream.Collectors.*; public final class ErrorProbabilities { - private final LinkedHashMap> alleleProbabilitiesByFilter; + private LinkedHashMap> alleleProbabilitiesByFilter; private final Map> probabilitiesByTypeAndAllele; private final List combinedErrorProbabilitiesByAllele; private final int numAltAlleles; @@ -23,9 +23,10 @@ public ErrorProbabilities(final List filters, final VariantContex .collect(toMap( Function.identity(), f -> f.errorProbabilities(vc, filteringEngine, referenceContext), - (a, b) -> a, LinkedHashMap::new)) + (a, b) -> a, LinkedHashMap::new)); + // remove filters that were not applied. i.e. returned empty list - .entrySet().stream().filter(entry -> !entry.getValue().isEmpty()) + alleleProbabilitiesByFilter = alleleProbabilitiesByFilter.entrySet().stream().filter(entry -> !entry.getValue().isEmpty()) .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (a, b) -> a, LinkedHashMap::new)); LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); @@ -54,7 +55,7 @@ public Map> getProbabilitiesForAlleleFilters() { } public Map getProbabilitiesForVariantFilters() { - return getPartitionedProbabilitiesByFilter(false).entrySet().stream() + return getPartitionedProbabilitiesByFilter(true).entrySet().stream() .filter(entry -> entry.getValue() != null && !entry.getValue().isEmpty()) .collect(toMap(entry -> entry.getKey(), entry -> entry.getValue().get(0))); } @@ -62,7 +63,7 @@ public Map getProbabilitiesForVariantFilters() { private Map> getPartitionedProbabilitiesByFilter(boolean variantOnly) { Map>> groups = alleleProbabilitiesByFilter.entrySet().stream().collect(Collectors.partitioningBy( - entry -> entry.getKey().getClass().isInstance(Mutect2VariantFilter.class), + entry -> Mutect2VariantFilter.class.isAssignableFrom(entry.getKey().getClass()), toMap(Map.Entry::getKey, Map.Entry::getValue, (a,b) -> a, LinkedHashMap::new))); return groups.get(variantOnly); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java index a1120475124..4019a094229 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java @@ -56,7 +56,6 @@ public void recordCall(final ErrorProbabilities errorProbabilities, final double }); }); - //TODO fix this for variant only filters!! // for (final Map.Entry entry : errorProbabilities.getProbabilitiesForVariantFilters().entrySet()) { // final double filterArtifactProbability = entry.getValue(); // if (filterArtifactProbability > Mutect2FilteringEngine.EPSILON && filterArtifactProbability > threshold - Mutect2FilteringEngine.EPSILON) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java index 6560a8783cd..3074ab14b0a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java @@ -31,7 +31,7 @@ public List areAllelesArtifacts(final VariantContext vc, final Mutect2F // annotation. We have to do this because the indel, even if it maps uniquely, gets a poor mapping quality // by virtue of its mismatch. The reference mapping quality is a decent proxy for the region's mappability. int refQual = mappingQualityByAllele.remove(0); // get the ref value and convert list to alts only - new IndexRange(0, mappingQualityByAllele.size()-1).forEach(i -> { + new IndexRange(0, mappingQualityByAllele.size()).forEach(i -> { if (indelLengths != null && indelLengths.get(i) >= longIndelSize) { mappingQualityByAllele.set(i, refQual); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java index e5942a7377d..f34ed3a2d91 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java @@ -38,7 +38,7 @@ public List getAltData(Genotype g) { public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { LinkedHashMap> dataByAllele = getAltDataByAllele(vc, checkPreconditions(), this::getAltData, filteringEngine); return dataByAllele.entrySet().stream() - .filter(entry -> !entry.getKey().isSymbolic() && !vc.getReference().equals(entry.getKey())) + .filter(entry -> /*!entry.getKey().isSymbolic() &&*/ !vc.getReference().equals(entry.getKey())) .map(entry -> entry.getValue().stream().max(Double::compare).orElse(1.0) < minAf).collect(Collectors.toList()); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 6a9b6b2a4b7..be7681c0980 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -304,7 +304,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { } if (MTFAC.mitochondria) { - filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); + filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); // convert!! filters.add(new NuMTFilter(MTFAC.medianAutosomalCoverage, MTFAC.maxNuMTAutosomalCopies)); } else { filters.add(new ClusteredEventsFilter(MTFAC.maxEventsInRegion)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java index 398fdd18710..4b07a6a8559 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java @@ -2,9 +2,9 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.utils.IndexRange; import java.util.ArrayList; -import java.util.Collections; import java.util.List; public abstract class Mutect2VariantFilter extends Mutect2Filter { @@ -13,9 +13,10 @@ public Mutect2VariantFilter() { } @Override public List errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { int numAltAlleles = vc.getNAlleles() - 1; - final double result = requiredAnnotations().stream().allMatch(vc::hasAttribute) ? calculateErrorProbability(vc, filteringEngine, referenceContext) : 0.0; + final double result = Mutect2FilteringEngine.roundFinitePrecisionErrors(requiredAnnotations().stream().allMatch(vc::hasAttribute) ? + calculateErrorProbability(vc, filteringEngine, referenceContext) : 0.0); ArrayList resultList = new ArrayList<>(numAltAlleles); - Collections.fill(resultList, Mutect2FilteringEngine.roundFinitePrecisionErrors(result)); + new IndexRange(0, numAltAlleles).forEach(i -> resultList.add(result)); return resultList; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java index ee043dd4766..9c6e199ad46 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java @@ -41,7 +41,7 @@ public List getData(Genotype g) { public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { LinkedHashMap> dataByAllele = getDataByAllele(vc, checkPreconditions(), this::getData, filteringEngine); return dataByAllele.entrySet().stream() - .filter(entry -> !entry.getKey().isSymbolic() && !vc.getReference().equals(entry.getKey())) + .filter(entry -> /*!entry.getKey().isSymbolic() &&*/ !vc.getReference().equals(entry.getKey())) .map(entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff).collect(Collectors.toList()); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index e7df36d71b1..701486c6c3b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -579,17 +579,6 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti Assert.assertEquals(actualFilters, expectedFilters); } -// @Test -// public void testLowHetTool() throws IOException { -// final IntegrationTestSpec testSpec = new IntegrationTestSpec( -// " -R " + MITO_REF.getAbsolutePath() + -// " -V " + NA12878_MITO_FILTERED_VCF + -// " -O %s", -// Arrays.asList(TEST_OUTPUT_DIRECTORY + "expected_ExampleVariantWalkerIntegrationTest_output.txt") -// ); -// testSpec.executeTest("testExampleVariantWalker", this); -// } - @Test public void testMitochondrialRefConf() { Utils.resetRandomGenerator(); From e93445385c29693b8440aa536c2dfbbd171d99ad Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 14 Jan 2020 18:33:39 -0500 Subject: [PATCH 19/85] mito filter tests pass --- .../mutect/filtering/ContaminationFilter.java | 36 ------------------- .../mutect/filtering/ErrorProbabilities.java | 12 +++++-- .../filtering/FilteringOutputStats.java | 20 +---------- .../mutect/filtering/Mutect2AlleleFilter.java | 2 +- .../mutect/Mutect2IntegrationTest.java | 4 +-- 5 files changed, 13 insertions(+), 61 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java index bc465f67b5c..84d2dc7c761 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java @@ -80,42 +80,6 @@ public List calculateErrorProbabilityForAlleles(final VariantContext vc, return depthsAndPosteriorsPerAllele.stream().map(alleleData -> alleleData.isEmpty() ? Double.NaN : weightedMedianPosteriorProbability(alleleData)).collect(Collectors.toList()); } -// @Override - public double calculateErrorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - final List> depthsAndPosteriors = new ArrayList<>(); - - for (final Genotype tumorGenotype : vc.getGenotypes()) { - if (filteringEngine.isNormal(tumorGenotype)) { - continue; - } - - final double contaminationFromFile = contaminationBySample.getOrDefault(tumorGenotype.getSampleName(), defaultContamination); - final double contamination = Math.max(0, Math.min(contaminationFromFile, 1 - EPSILON)); // handle file with contamination == 1 - final double[] alleleFractions = VariantContextGetters.getAttributeAsDoubleArray(tumorGenotype, VCFConstants.ALLELE_FREQUENCY_KEY, - () -> new double[] {1.0}, 1.0); - final int maxFractionIndex = MathUtils.maxElementIndex(alleleFractions); - final int[] ADs = tumorGenotype.getAD(); - final int altCount = ADs[maxFractionIndex + 1]; // AD is all alleles, while AF is alts only, hence the +1 offset - final int depth = (int) MathUtils.sum(ADs); - final double[] negativeLog10AlleleFrequencies = VariantContextGetters.getAttributeAsDoubleArray(vc, - GATKVCFConstants.POPULATION_AF_KEY, () -> new double[]{Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY}, Double.POSITIVE_INFINITY); - final double alleleFrequency = MathUtils.applyToArray(negativeLog10AlleleFrequencies, x -> Math.pow(10,-x))[maxFractionIndex]; - - final double logSomaticLikelihood = filteringEngine.getSomaticClusteringModel().logLikelihoodGivenSomatic(depth, altCount); - - final double singleContaminantLikelihood = 2 * alleleFrequency * (1 - alleleFrequency) * MathUtils.binomialProbability(depth, altCount, contamination /2) - + MathUtils.square(alleleFrequency) * MathUtils.binomialProbability(depth, altCount, contamination); - final double manyContaminantLikelihood = MathUtils.binomialProbability(depth, altCount, contamination * alleleFrequency); - final double logContaminantLikelihood = Math.log(Math.max(singleContaminantLikelihood, manyContaminantLikelihood)); - final double logOddsOfRealVsContamination = logSomaticLikelihood - logContaminantLikelihood; - final double posteriorProbOfContamination = filteringEngine.posteriorProbabilityOfError(vc, logOddsOfRealVsContamination, maxFractionIndex); - - depthsAndPosteriors.add(ImmutablePair.of(altCount, posteriorProbOfContamination)); - } - - return weightedMedianPosteriorProbability(depthsAndPosteriors); - } - @Override public String filterName() { return GATKVCFConstants.CONTAMINATION_FILTER_NAME; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 5fa466d8cef..14e7af3b45c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -23,11 +23,17 @@ public ErrorProbabilities(final List filters, final VariantContex .collect(toMap( Function.identity(), f -> f.errorProbabilities(vc, filteringEngine, referenceContext), - (a, b) -> a, LinkedHashMap::new)); - + (a, b) -> a, LinkedHashMap::new)) // remove filters that were not applied. i.e. returned empty list - alleleProbabilitiesByFilter = alleleProbabilitiesByFilter.entrySet().stream().filter(entry -> !entry.getValue().isEmpty()) + .entrySet().stream().filter(entry -> !entry.getValue().isEmpty()) .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (a, b) -> a, LinkedHashMap::new)); + + // if vc has symbolic allele, remove it + if (vc.hasSymbolicAlleles()) { + // can we assume it's the last allele? + int symIndex = numAltAlleles - 1; + alleleProbabilitiesByFilter.values().stream().forEach(probList -> probList.remove(symIndex)); + } LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); probabilitiesByAllelesForEachFilter.replaceAll((k, v) -> ErrorProbabilities.transpose(v)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java index 4019a094229..37efe00235d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteringOutputStats.java @@ -46,7 +46,7 @@ public void recordCall(final ErrorProbabilities errorProbabilities, final double }); new IndexRange(0, probabilitiesPerAllele.size()).forEach(i -> { - errorProbabilities.getProbabilitiesForAlleleFilters().entrySet().stream().forEach(entry -> { + errorProbabilities.getProbabilitiesByFilter().entrySet().stream().forEach(entry -> { double alleleProb = entry.getValue().get(i); if (alleleProb > Mutect2FilteringEngine.EPSILON && alleleProb > threshold - Mutect2FilteringEngine.EPSILON) { filterFNs.get(entry.getKey()).add(1 - probabilitiesPerAllele.get(i)); @@ -56,26 +56,8 @@ public void recordCall(final ErrorProbabilities errorProbabilities, final double }); }); -// for (final Map.Entry entry : errorProbabilities.getProbabilitiesForVariantFilters().entrySet()) { -// final double filterArtifactProbability = entry.getValue(); -// if (filterArtifactProbability > Mutect2FilteringEngine.EPSILON && filterArtifactProbability > threshold - Mutect2FilteringEngine.EPSILON) { -// filterFNs.get(entry.getKey()).add(1 - errorProbability); -// } else if (!filtered) { -// filterFPs.get(entry.getKey()).add(filterArtifactProbability); -// } -// } } -// for (final Map.Entry entry : errorProbabilities.getProbabilitiesByFilter().entrySet()) { -// final double filterArtifactProbability = entry.getValue(); -// if (filterArtifactProbability > Mutect2FilteringEngine.EPSILON && filterArtifactProbability > threshold - Mutect2FilteringEngine.EPSILON) { -// filterFNs.get(entry.getKey()).add(1 - errorProbability); -// } else if (!filtered) { -// filterFPs.get(entry.getKey()).add(filterArtifactProbability); -// } -// } - - public void writeFilteringStats(final Path filteringStats, final double threshold, List> clusteringMetadata) { final double totalTrueVariants = TPs + FNs; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 3fa83a86f20..91e62e06ecd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -46,7 +46,7 @@ private LinkedHashMap> combineDataByAllele(final LinkedHashMap errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 701486c6c3b..f4342bacc5d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -533,7 +533,7 @@ public Object[][] vcfsForFiltering() { ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), Collections.emptySet(), - ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, //GATKVCFConstants.CONTAMINATION_FILTER_NAME, + ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME)), Arrays.asList( @@ -541,7 +541,7 @@ public Object[][] vcfsForFiltering() { Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // ".|weak_evidence, possible_numt|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|PASS|weak_evidence, base_qual, low_allele_frac|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME /*+ ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME*/ + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, map_qual, contamination, position, low_allele_frac, possible_numt|." + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, map_qual, contamination, position, low_allele_frac, possible_numt|." )} }; } From 5d756078beb068bfbae3b580b85892a15ad377ff Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 15 Jan 2020 17:19:14 -0500 Subject: [PATCH 20/85] add AS_SB_Table as mutect2 annotation --- .../AS_StrandBiasMutectAnnotation.java | 44 ++++++ .../allelespecific/AS_StrandBiasTest.java | 137 +--------------- .../allelespecific/StrandBiasUtils.java | 146 ++++++++++++++++++ .../filtering/Mutect2FilteringEngine.java | 4 +- .../utils/variant/GATKVCFHeaderLines.java | 7 +- 5 files changed, 198 insertions(+), 140 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java new file mode 100644 index 00000000000..22b5b1def43 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java @@ -0,0 +1,44 @@ +package org.broadinstitute.hellbender.tools.walkers.annotator; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFInfoHeaderLine; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_StrandBiasTest; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AlleleSpecificAnnotationData; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; +import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; +import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class AS_StrandBiasMutectAnnotation extends InfoFieldAnnotation implements StandardMutectAnnotation { + private final static Logger logger = LogManager.getLogger(StrandBiasBySample.class); + @Override + public Map annotate(ReferenceContext ref, VariantContext vc, AlleleLikelihoods likelihoods) { + Utils.nonNull(vc); + + if ( likelihoods == null ) { + logger.warn("Annotation will not be calculated, alleleLikelihoodMap is null"); + return null; + } + + return StrandBiasUtils.computeSBAnnotation(vc, likelihoods, GATKVCFConstants.AS_SB_TABLE_KEY); + } + + @Override + public List getDescriptions() { + return super.getDescriptions(); + } + + @Override + public List getKeyNames() { + return Collections.singletonList(GATKVCFConstants.AS_SB_TABLE_KEY); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_StrandBiasTest.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_StrandBiasTest.java index 979dc49d122..569730c0a98 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_StrandBiasTest.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_StrandBiasTest.java @@ -26,12 +26,6 @@ public abstract class AS_StrandBiasTest extends StrandBiasTest implements Reduci public static final double MIN_PVALUE = 1.0E-320; public static final int FORWARD = 0; public static final int REVERSE = 1; - private final List ZERO_LIST = new ArrayList<>(); - - public AS_StrandBiasTest(){ - ZERO_LIST.add(0,0); - ZERO_LIST.add(1,0); - } @Override public String getPrimaryRawKey() { return GATKVCFConstants.AS_SB_TABLE_KEY; } @@ -82,30 +76,7 @@ public Map annotateRawData(final ReferenceContext ref, if ( likelihoods == null) { return Collections.emptyMap(); } - // calculate the annotation from the likelihoods - // likelihoods can come from HaplotypeCaller call to VariantAnnotatorEngine - final Map annotations = new HashMap<>(); - final ReducibleAnnotationData> myData = new AlleleSpecificAnnotationData<>(vc.getAlleles(),null); - getStrandCountsFromLikelihoodMap(vc, likelihoods, myData, MIN_COUNT); - final Map> perAlleleValues = myData.getAttributeMap(); - final String annotationString = makeRawAnnotationString(vc.getAlleles(), perAlleleValues); - annotations.put(getPrimaryRawKey(), annotationString); - return annotations; - } - - protected String makeRawAnnotationString(final List vcAlleles, final Map> perAlleleValues) { - String annotationString = ""; - for (final Allele a : vcAlleles) { - if (!annotationString.isEmpty()) { - annotationString += PRINT_DELIM; - } - List alleleValues = perAlleleValues.get(a); - if (alleleValues == null) { - alleleValues = ZERO_LIST; - } - annotationString += encode(alleleValues); - } - return annotationString; + return StrandBiasUtils.computeSBAnnotation(vc, likelihoods, getPrimaryRawKey()); } protected String makeReducedAnnotationString(VariantContext vc, Map perAltsStrandCounts) { @@ -139,23 +110,12 @@ public Map combineRawData(final List vcAlleles, final Li for (final ReducibleAnnotationData currentValue : annotationList) { parseRawDataString(currentValue); - combineAttributeMap(currentValue, combinedData); + StrandBiasUtils.combineAttributeMap(currentValue, combinedData); } - final String annotationString = makeRawAnnotationString(vcAlleles, combinedData.getAttributeMap()); + final String annotationString = StrandBiasUtils.makeRawAnnotationString(vcAlleles, combinedData.getAttributeMap()); return Collections.singletonMap(getPrimaryRawKey(), annotationString); } - protected String encode(List alleleValues) { - String annotationString = ""; - for (int j =0; j < alleleValues.size(); j++) { - annotationString += alleleValues.get(j); - if (j < alleleValues.size()-1) { - annotationString += ","; - } - } - return annotationString; - } - /** * Parses the raw data stings of combined contingency matrix data and calls client methods calculateReducedData(myData) * implementation to generate double digest of provided allele information which is stored in '|' delineated lists. @@ -179,7 +139,7 @@ public Map finalizeRawData(final VariantContext vc, final Varia Map perAltRankSumResults = calculateReducedData(myData); String annotationString = makeReducedAnnotationString(vc, perAltRankSumResults); - String rawAnnotationsString = makeRawAnnotationString(vc.getAlleles(), myData.getAttributeMap()); + String rawAnnotationsString = StrandBiasUtils.makeRawAnnotationString(vc.getAlleles(), myData.getAttributeMap()); Map returnMap = new HashMap<>(); returnMap.put(getKeyNames().get(0), annotationString); returnMap.put(getPrimaryRawKey(), rawAnnotationsString); //this is in case raw annotations are requested @@ -209,95 +169,6 @@ protected void parseRawDataString(ReducibleAnnotationData> myData) myData.setAttributeMap(perAlleleValues); } - /** - Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: - * fw rc - * allele1 # # - * allele2 # # - * @return a 2x2 contingency table - */ - public void getStrandCountsFromLikelihoodMap( final VariantContext vc, - final AlleleLikelihoods likelihoods, - final ReducibleAnnotationData> perAlleleValues, - final int minCount) { - if( likelihoods == null || vc == null ) { - return; - } - - final Allele ref = vc.getReference(); - final List allAlts = vc.getAlternateAlleles(); - - for (final String sample : likelihoods.samples()) { - final ReducibleAnnotationData> sampleTable = new AlleleSpecificAnnotationData<>(vc.getAlleles(),null); - likelihoods.bestAllelesBreakingTies(sample).stream() - .filter(ba -> ba.isInformative()) - .forEach(ba -> updateTable(ba.allele, ba.evidence, ref, allAlts, sampleTable)); - if (passesMinimumThreshold(sampleTable, minCount)) { - combineAttributeMap(sampleTable, perAlleleValues); - } - } - } - - - protected void combineAttributeMap(final ReducibleAnnotationData> toAdd, final ReducibleAnnotationData> combined) { - for (final Allele a : combined.getAlleles()) { - if (toAdd.hasAttribute(a) && toAdd.getAttribute(a) != null) { - if (combined.getAttribute(a) != null) { - combined.getAttribute(a).set(FORWARD, (int) combined.getAttribute(a).get(FORWARD) + (int) toAdd.getAttribute(a).get(FORWARD)); - combined.getAttribute(a).set(REVERSE, (int) combined.getAttribute(a).get(REVERSE) + (int) toAdd.getAttribute(a).get(REVERSE)); - } - else { - List alleleData = new ArrayList<>(); - alleleData.add(FORWARD, toAdd.getAttribute(a).get(FORWARD)); - alleleData.add(REVERSE, toAdd.getAttribute(a).get(REVERSE)); - combined.putAttribute(a,alleleData); - } - } - } - } - - private void updateTable(final Allele bestAllele, final GATKRead read, final Allele ref, final List allAlts, final ReducibleAnnotationData> perAlleleValues) { - - final boolean matchesRef = bestAllele.equals(ref, true); - final boolean matchesAnyAlt = allAlts.contains(bestAllele); - - //can happen if a read's most likely allele has been removed when --max_alternate_alleles is exceeded - if (!( matchesRef || matchesAnyAlt )) { - return; - } - - final List alleleStrandCounts; - if (perAlleleValues.hasAttribute(bestAllele) && perAlleleValues.getAttribute(bestAllele) != null) { - alleleStrandCounts = perAlleleValues.getAttribute(bestAllele); - } else { - alleleStrandCounts = new ArrayList<>(); - alleleStrandCounts.add(0,0); - alleleStrandCounts.add(1,0); - } - final boolean isForward = !read.isReverseStrand(); - if (isForward) { - alleleStrandCounts.set(FORWARD, alleleStrandCounts.get(FORWARD) + 1); - } else { - alleleStrandCounts.set(REVERSE, alleleStrandCounts.get(REVERSE) + 1); - } - perAlleleValues.putAttribute(bestAllele, alleleStrandCounts); - } - - /** - * Does this strand data array pass the minimum threshold for inclusion? - * - * @param sampleTable the per-allele fwd/rev read counts for a single sample - * @param minCount The minimum threshold of counts in the array - * @return true if it passes the minimum threshold, false otherwise - */ - protected boolean passesMinimumThreshold(final ReducibleAnnotationData> sampleTable, final int minCount) { - final int readCount = sampleTable.getAttributeMap().values().stream() - .filter(alleleValues -> alleleValues != null) - .mapToInt(alleleValues -> alleleValues.get(FORWARD) + alleleValues.get(REVERSE)) - .sum(); - return readCount > minCount; - } - @Override //Allele-specific annotations cannot be called from walkers other than HaplotypeCaller protected Map calculateAnnotationFromGTfield(final GenotypesContext genotypes){ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java new file mode 100644 index 00000000000..f1b028ae17a --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java @@ -0,0 +1,146 @@ +package org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; +import org.broadinstitute.hellbender.utils.read.GATKRead; + +import java.util.*; + +public class StrandBiasUtils { + public static final int FORWARD = 0; + public static final int REVERSE = 1; + public static final int MIN_COUNT = 2; + public static final String PRINT_DELIM = "|"; + private static final List ZERO_LIST = new ArrayList<>(Arrays.asList(0,0)); + + public static Map computeSBAnnotation(VariantContext vc, AlleleLikelihoods likelihoods, String key) { + // calculate the annotation from the likelihoods + // likelihoods can come from HaplotypeCaller call to VariantAnnotatorEngine + final Map annotations = new HashMap<>(); + final ReducibleAnnotationData> myData = new AlleleSpecificAnnotationData<>(vc.getAlleles(),null); + getStrandCountsFromLikelihoodMap(vc, likelihoods, myData, MIN_COUNT); + final Map> perAlleleValues = myData.getAttributeMap(); + final String annotationString = makeRawAnnotationString(vc.getAlleles(), perAlleleValues); + annotations.put(key, annotationString); + return annotations; + } + + protected static String makeRawAnnotationString(final List vcAlleles, final Map> perAlleleValues) { + String annotationString = ""; + for (final Allele a : vcAlleles) { + if (!annotationString.isEmpty()) { + annotationString += PRINT_DELIM; + } + List alleleValues = perAlleleValues.get(a); + if (alleleValues == null) { + alleleValues = ZERO_LIST; + } + annotationString += encode(alleleValues); + } + return annotationString; + } + + protected static String encode(List alleleValues) { + String annotationString = ""; + for (int j =0; j < alleleValues.size(); j++) { + annotationString += alleleValues.get(j); + if (j < alleleValues.size()-1) { + annotationString += ","; + } + } + return annotationString; + } + + + /** + Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: + * fw rc + * allele1 # # + * allele2 # # + * @return a 2x2 contingency table + */ + public static void getStrandCountsFromLikelihoodMap( final VariantContext vc, + final AlleleLikelihoods likelihoods, + final ReducibleAnnotationData> perAlleleValues, + final int minCount) { + if( likelihoods == null || vc == null ) { + return; + } + + final Allele ref = vc.getReference(); + final List allAlts = vc.getAlternateAlleles(); + + for (final String sample : likelihoods.samples()) { + final ReducibleAnnotationData> sampleTable = new AlleleSpecificAnnotationData<>(vc.getAlleles(),null); + likelihoods.bestAllelesBreakingTies(sample).stream() + .filter(ba -> ba.isInformative()) + .forEach(ba -> updateTable(ba.allele, ba.evidence, ref, allAlts, sampleTable)); + if (passesMinimumThreshold(sampleTable, minCount)) { + combineAttributeMap(sampleTable, perAlleleValues); + } + } + } + + protected static void combineAttributeMap(final ReducibleAnnotationData> toAdd, final ReducibleAnnotationData> combined) { + for (final Allele a : combined.getAlleles()) { + if (toAdd.hasAttribute(a) && toAdd.getAttribute(a) != null) { + if (combined.getAttribute(a) != null) { + combined.getAttribute(a).set(FORWARD, (int) combined.getAttribute(a).get(FORWARD) + (int) toAdd.getAttribute(a).get(FORWARD)); + combined.getAttribute(a).set(REVERSE, (int) combined.getAttribute(a).get(REVERSE) + (int) toAdd.getAttribute(a).get(REVERSE)); + } + else { + List alleleData = new ArrayList<>(); + alleleData.add(FORWARD, toAdd.getAttribute(a).get(FORWARD)); + alleleData.add(REVERSE, toAdd.getAttribute(a).get(REVERSE)); + combined.putAttribute(a,alleleData); + } + } + } + } + + private static void updateTable(final Allele bestAllele, final GATKRead read, final Allele ref, final List allAlts, final ReducibleAnnotationData> perAlleleValues) { + + final boolean matchesRef = bestAllele.equals(ref, true); + final boolean matchesAnyAlt = allAlts.contains(bestAllele); + + //can happen if a read's most likely allele has been removed when --max_alternate_alleles is exceeded + if (!( matchesRef || matchesAnyAlt )) { + return; + } + + final List alleleStrandCounts; + if (perAlleleValues.hasAttribute(bestAllele) && perAlleleValues.getAttribute(bestAllele) != null) { + alleleStrandCounts = perAlleleValues.getAttribute(bestAllele); + } else { + alleleStrandCounts = new ArrayList<>(); + alleleStrandCounts.add(0,0); + alleleStrandCounts.add(1,0); + } + final boolean isForward = !read.isReverseStrand(); + if (isForward) { + alleleStrandCounts.set(FORWARD, alleleStrandCounts.get(FORWARD) + 1); + } else { + alleleStrandCounts.set(REVERSE, alleleStrandCounts.get(REVERSE) + 1); + } + perAlleleValues.putAttribute(bestAllele, alleleStrandCounts); + } + + /** + * Does this strand data array pass the minimum threshold for inclusion? + * + * @param sampleTable the per-allele fwd/rev read counts for a single sample + * @param minCount The minimum threshold of counts in the array + * @return true if it passes the minimum threshold, false otherwise + */ + protected static boolean passesMinimumThreshold(final ReducibleAnnotationData> sampleTable, final int minCount) { + final int readCount = sampleTable.getAttributeMap().values().stream() + .filter(alleleValues -> alleleValues != null) + .mapToInt(alleleValues -> alleleValues.get(FORWARD) + alleleValues.get(REVERSE)) + .sum(); + return readCount > minCount; + } + + + +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index be7681c0980..4ce28a0d53a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -279,7 +279,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); filters.add(new StrandArtifactFilter()); // convert - filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); // test + filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); // convert filters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); filters.add(new MinAlleleFractionFilter(MTFAC.minAf)); @@ -290,7 +290,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { filters.add(new NormalArtifactFilter(MTFAC.normalPileupPValueThreshold)); filters.add(new NRatioFilter(MTFAC.nRatio)); - // filters that don't apply to specific alleles + // filters that don't apply to specific alleles but can still be converted filters.add(new PanelOfNormalsFilter()); if (!MTFAC.readOrientationPriorTarGzs.isEmpty()) { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index 5db25e67391..a27c7951a74 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -1,11 +1,7 @@ package org.broadinstitute.hellbender.utils.variant; import htsjdk.variant.vcf.*; -import org.broadinstitute.hellbender.tools.walkers.annotator.BaseQuality; -import org.broadinstitute.hellbender.tools.walkers.annotator.FragmentLength; -import org.broadinstitute.hellbender.tools.walkers.annotator.MappingQuality; -import org.broadinstitute.hellbender.tools.walkers.annotator.ReadPosition; -import org.broadinstitute.hellbender.tools.walkers.annotator.RMSMappingQuality; +import org.broadinstitute.hellbender.tools.walkers.annotator.*; import org.broadinstitute.hellbender.utils.Utils; import java.util.*; @@ -211,6 +207,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addInfoLine(new FragmentLength().getDescriptions().get(0)); addInfoLine(new MappingQuality().getDescriptions().get(0)); addInfoLine(new ReadPosition().getDescriptions().get(0)); + addInfoLine(new AS_StrandBiasMutectAnnotation().getDescriptions().get(0)); addInfoLine(new VCFInfoHeaderLine(UNITIG_SIZES_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Sizes of reassembled unitigs")); addInfoLine(new VCFInfoHeaderLine(JOINT_ALIGNMENT_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of joint alignments")); addInfoLine(new VCFInfoHeaderLine(ALIGNMENT_SCORE_DIFFERENCE_KEY, 1, VCFHeaderLineType.Integer, "Difference in alignment score between best and next-best alignment")); From 1e3595220073cb91bc8397027b6c3643793c7807 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 15 Jan 2020 18:05:33 -0500 Subject: [PATCH 21/85] made strict strand bias allele specific --- .../mutect/filtering/Mutect2AlleleFilter.java | 1 + .../filtering/StrictStrandBiasFilter.java | 71 +++++-------------- 2 files changed, 17 insertions(+), 55 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 91e62e06ecd..24a7f438b7b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -57,5 +57,6 @@ public List errorProbabilities(final VariantContext vc, final Mutect2Fil Collections.emptyList(); } + // returning an empty list means filter is not evaluated protected abstract List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index d0904f4045f..42436497454 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -1,22 +1,16 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.lang3.mutable.MutableInt; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.tools.walkers.annotator.StrandBiasBySample; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; -import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashMap; import java.util.List; -import java.util.function.Predicate; import java.util.stream.Collectors; -public class StrictStrandBiasFilter extends HardFilter { //HardAlleleFilter { +public class StrictStrandBiasFilter extends HardAlleleFilter> { private final int minReadsOnEachStrand; public StrictStrandBiasFilter(final int minReadsOnEachStrand) { @@ -26,59 +20,26 @@ public StrictStrandBiasFilter(final int minReadsOnEachStrand) { @Override public ErrorType errorType() { return ErrorType.ARTIFACT; } - public Predicate checkPreconditions() { - return g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY); - } - - public List getData(Genotype g) { - int[] data = GATKProtectedVariantContextUtils.getAttributeAsIntArray(g, GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, () -> null, 0); - return Arrays.stream(data).boxed().collect(Collectors.toList()); - } - -// @Override -// public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { -// final MutableInt altForwardCount = new MutableInt(0); -// final MutableInt altReverseCount = new MutableInt(0); - -// LinkedHashMap> dataByAllele = getDataByAllele(vc, checkPreconditions(), this::getData, filteringEngine); -// return dataByAllele.entrySet().stream() -// .filter(entry -> !entry.getKey().isSymbolic() && !vc.getReference().equals(entry.getKey())) -// .map(entry -> minReadsOnEachStrand > 0 && entry.getValue().stream().min(Integer::compare).orElse(0) < minReadsOnEachStrand).collect(Collectors.toList()); - - -// vc.getGenotypes().stream().filter(filteringEngine::isTumor) -// .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) -// .forEach(g -> { -// final int[] strandBiasCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(g, GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, () -> null, 0); -// altForwardCount.add(StrandBiasBySample.getAltForwardCountFromFlattenedContingencyTable(strandBiasCounts)); -// altReverseCount.add(StrandBiasBySample.getAltReverseCountFromFlattenedContingencyTable(strandBiasCounts)); -// }); -// -// // filter if there is no alt evidence in the forward or reverse strand -// return Math.min(altForwardCount.getValue(), altReverseCount.getValue()) < minReadsOnEachStrand; -//} - @Override - public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { - if (minReadsOnEachStrand == 0) { - return false; + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + String sbStr = vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_SB_TABLE_KEY, null); + if (sbStr == null) { + return Collections.emptyList(); } - final MutableInt altForwardCount = new MutableInt(0); - final MutableInt altReverseCount = new MutableInt(0); + List alleleSBs = AnnotationUtils.decodeAnyASListWithPrintDelim(sbStr); + if (alleleSBs.size() <= 1) { + return Collections.emptyList(); + } - vc.getGenotypes().stream().filter(filteringEngine::isTumor) - .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) - .forEach(g -> { - final int[] strandBiasCounts = VariantContextGetters.getAttributeAsIntArray(g, GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, () -> null, 0); - altForwardCount.add(StrandBiasBySample.getAltForwardCountFromFlattenedContingencyTable(strandBiasCounts)); - altReverseCount.add(StrandBiasBySample.getAltReverseCountFromFlattenedContingencyTable(strandBiasCounts)); - }); + // skip the reference + List> sbs = alleleSBs.subList(1, alleleSBs.size()).stream().map( + asb -> AnnotationUtils.decodeAnyASList(asb).stream() + .mapToInt(Integer::parseInt).boxed().collect(Collectors.toList())).collect(Collectors.toList()); - // filter if there is no alt evidence in the forward or reverse strand - return Math.min(altForwardCount.getValue(), altReverseCount.getValue()) < minReadsOnEachStrand; - } + return sbs.stream().map(altList -> altList.stream().anyMatch(x -> x == 0)).collect(Collectors.toList()); + } @Override public String filterName() { From f347db5796c589d4e7d834fe2c1401f794016555 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 16 Jan 2020 15:45:40 -0500 Subject: [PATCH 22/85] add test data for AS_SB_TABLE in vcf. format not compatible with annotation in gvcf --- .../filtering/StrictStrandBiasFilter.java | 13 ++---- .../mutect/Mutect2IntegrationTest.java | 10 +++-- .../mutect/mito/unfiltered-with-assb.vcf | 45 +++++++++++++++++++ .../mito/unfiltered-with-assb.vcf.stats | 2 + 4 files changed, 57 insertions(+), 13 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf.stats diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index 42436497454..084749f2f83 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -22,19 +22,14 @@ public StrictStrandBiasFilter(final int minReadsOnEachStrand) { @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - String sbStr = vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_SB_TABLE_KEY, null); - if (sbStr == null) { - return Collections.emptyList(); - } - - List alleleSBs = AnnotationUtils.decodeAnyASListWithPrintDelim(sbStr); - if (alleleSBs.size() <= 1) { + List sbStr = vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_SB_TABLE_KEY, null); + if (sbStr == null || sbStr.size() <= 1) { return Collections.emptyList(); } // skip the reference - List> sbs = alleleSBs.subList(1, alleleSBs.size()).stream().map( - asb -> AnnotationUtils.decodeAnyASList(asb).stream() + List> sbs = sbStr.subList(1, sbStr.size()).stream().map( + asb -> AnnotationUtils.decodeAnyASListWithPrintDelim(asb).stream() .mapToInt(Integer::parseInt).boxed().collect(Collectors.toList())).collect(Collectors.toList()); return sbs.stream().map(altList -> altList.stream().anyMatch(x -> x == 0)).collect(Collectors.toList()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index f4342bacc5d..37cceaa04c3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -75,7 +75,9 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest { private static final File TEN_PCT_CONTAMINATION_TABLE = new File(toolsTestDir, "mutect/ten-pct-contamination.table"); private static final File NA12878_MITO_BAM = new File(toolsTestDir, "mutect/mito/NA12878.bam"); - private static final File NA12878_MITO_VCF = new File(toolsTestDir, "mutect/mito/unfiltered.vcf"); + private static final File NA12878_MITO_VCF = new File(toolsTestDir, "mutect/mito/unfiltered-with-assb.vcf"); +// private static final File NA12878_MITO_VCF = new File(toolsTestDir, "mutect/mito/unfiltered.vcf"); +// private static final File NA12878_MITO_GVCF = new File(toolsTestDir, "mutect/mito/unfiltered-assb.g.vcf"); private static final File NA12878_MITO_GVCF = new File(toolsTestDir, "mitochondria/NA12878.MT.g.vcf"); private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); private static final File DEEP_MITO_BAM = new File(largeFileTestDir, "mutect/highDPMTsnippet.bam"); @@ -512,7 +514,7 @@ public void testMitochondria() { public Object[][] vcfsForFiltering() { return new Object[][]{ {NA12878_MITO_VCF, 0.5, 30, Collections.emptyList(), Arrays.asList( - Collections.emptySet(), + ImmutableSet.of(GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), ImmutableSet.of(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), ImmutableSet.of( GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, @@ -521,10 +523,10 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), Collections.emptySet()), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strict_stand Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, low_allele_frac, possible_numt|low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, low_allele_frac, possible_numt|strict_strand, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4) // .|PASS )}, diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf new file mode 100644 index 00000000000..299db9d1364 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf @@ -0,0 +1,45 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##Mutect Version=2.1 +##contig= +##filtering_status=Warning: unfiltered Mutect2 calls. Please run FilterMutectCalls to remove false positives. +##source=Mutect2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 152 . T C . . AS_SB_TABLE=0|3,1556|0;DP=1582;ECNT=1;TLOD=5266.19;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . . AS_SB_TABLE=1|0,431|400;DP=858;ECNT=4;TLOD=2641.72;POPAF=5.000e-08;OCM=800 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 301 . A AC . . AS_SB_TABLE=500|79,25|28;DP=680;ECNT=4;TLOD=3.32;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0|5,200|201,30|37,0|49;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . . AS_SB_TABLE=0|0,200|458;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . . AS_SB_TABLE=0|1,700|824;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf.stats b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf.stats new file mode 100644 index 00000000000..80d434628be --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf.stats @@ -0,0 +1,2 @@ +statistic value +callable 16000 From c7efe6e8b59b998582c7a5fbb11c55fe0d7cb920 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 16 Jan 2020 17:09:14 -0500 Subject: [PATCH 23/85] convert stand artifact - bug with prior list --- .../allelespecific/StrandBiasUtils.java | 13 ++ .../filtering/StrandArtifactFilter.java | 118 ++++++++++-------- .../filtering/StrictStrandBiasFilter.java | 13 +- 3 files changed, 86 insertions(+), 58 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java index f1b028ae17a..5656e5b877a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java @@ -2,10 +2,14 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.engine.filters.VariantFilter; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import java.util.*; +import java.util.stream.Collectors; public class StrandBiasUtils { public static final int FORWARD = 0; @@ -142,5 +146,14 @@ protected static boolean passesMinimumThreshold(final ReducibleAnnotationData
  • > getSBsForAlleles(VariantContext vc) { + List sbStr = vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_SB_TABLE_KEY, null); + if (sbStr == null || sbStr.isEmpty()) { + return Collections.emptyList(); + } + return sbStr.stream().map( + asb -> AnnotationUtils.decodeAnyASListWithPrintDelim(asb).stream() + .mapToInt(Integer::parseInt).boxed().collect(Collectors.toList())).collect(Collectors.toList()); + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index 3c7e021e280..09e71380d14 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -4,7 +4,9 @@ import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.math3.util.CombinatoricsUtils; import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.tools.walkers.validation.basicshortmutpileup.BetaBinomialDistribution; +import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.OptimizationUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -13,7 +15,7 @@ import java.util.function.DoubleUnaryOperator; import java.util.stream.Collectors; -public class StrandArtifactFilter extends Mutect2VariantFilter { +public class StrandArtifactFilter extends Mutect2AlleleFilter { // beta prior on strand bias allele fraction private double INITIAL_ALPHA_STRAND = 1.0; private double INITIAL_BETA_STRAND = 20.0; @@ -31,85 +33,103 @@ public class StrandArtifactFilter extends Mutect2VariantFilter { private static final double INITIAL_STRAND_ARTIFACT_PRIOR = 0.001; - private double strandArtifactPrior = INITIAL_STRAND_ARTIFACT_PRIOR; + private List alleleStrandArtifactPriors; private static final double ARTIFACT_PSEUDOCOUNT = 1; private static final double NON_ARTIFACT_PSEUDOCOUNT = 1000; - private final List eSteps = new ArrayList<>(); + private final List> eStepsForAltAlleles = new ArrayList<>(); @Override public ErrorType errorType() { return ErrorType.ARTIFACT; } @Override - public double calculateErrorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - final EStep probabilities = calculateArtifactProbabilities(vc, filteringEngine); - return probabilities.forwardArtifactResponsibility + probabilities.reverseArtifactResponsibility; + public List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + final List alleleProbs = calculateArtifactProbabilities(vc, filteringEngine); + return alleleProbs.isEmpty() ? Collections.emptyList() : + alleleProbs.stream().map(probabilities -> probabilities.forwardArtifactResponsibility + probabilities.reverseArtifactResponsibility).collect(Collectors.toList()); } - public EStep calculateArtifactProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { - // {fwd ref, rev ref, fwd alt, rev alt} - final int[] counts = filteringEngine.sumStrandCountsOverSamples(vc, true, false); - - final int indelSize = Math.abs(vc.getReference().length() - vc.getAlternateAllele(0).length()); - if (counts[2] + counts[3] == 0 || indelSize > LONGEST_STRAND_ARTIFACT_INDEL_SIZE) { - return new EStep(0, 0, counts[0] + counts[2], counts[1] + counts[3], counts[2], counts[3]); + public List calculateArtifactProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { + // for each allele, forward and reverse count + List> sbs = StrandBiasUtils.getSBsForAlleles(vc); + if (sbs == null || sbs.isEmpty() || sbs.size() <= 1) { + return Collections.emptyList(); } + if (alleleStrandArtifactPriors == null) { + alleleStrandArtifactPriors = Collections.nCopies(sbs.size()-1, INITIAL_STRAND_ARTIFACT_PRIOR); + } - return strandArtifactProbability(strandArtifactPrior, counts[0] + counts[2], counts[1] + counts[3], counts[2], counts[3], indelSize); - + final ListIterator indelSizeIterator = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList()).listIterator(); + final ListIterator priorIterator = alleleStrandArtifactPriors.listIterator(); + int refFwd = sbs.get(0).get(0); + int refRev = sbs.get(0).get(1); + // skip the reference + List> altSBs = sbs.subList(1, sbs.size()); + + return altSBs.stream().map(altSB -> { + final int altIndelSize = indelSizeIterator.next(); + final double altPrior = priorIterator.next(); + if (altSB.stream().mapToInt(Integer::intValue).sum() == 0 || altIndelSize > LONGEST_STRAND_ARTIFACT_INDEL_SIZE) { + return new EStep(0, 0, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1)); + } else { + return new EStep(altPrior, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1), altIndelSize); + } + }).collect(Collectors.toList()); } @Override protected void accumulateDataForLearning(final VariantContext vc, final ErrorProbabilities errorProbabilities, final Mutect2FilteringEngine filteringEngine) { if (requiredAnnotations().stream().allMatch(vc::hasAttribute)) { - final EStep eStep = calculateArtifactProbabilities(vc, filteringEngine); - eSteps.add(eStep); + final ListIterator eStepsIterator = calculateArtifactProbabilities(vc, filteringEngine).listIterator(); + eStepsForAltAlleles.stream().forEach(step -> step.add(eStepsIterator.next())); } } @Override protected void clearAccumulatedData() { - eSteps.clear(); + eStepsForAltAlleles.clear(); } @Override protected void learnParameters() { - final List potentialArtifacts = eSteps.stream() - .filter(eStep -> eStep.getArtifactProbability() > 0.1).collect(Collectors.toList()); - final double totalArtifacts = potentialArtifacts.stream().mapToDouble(EStep::getArtifactProbability).sum(); - final double totalNonArtifacts = eSteps.stream().mapToDouble(e -> 1 - e.getArtifactProbability()).sum(); - strandArtifactPrior = (totalArtifacts + ARTIFACT_PSEUDOCOUNT) / (totalArtifacts + ARTIFACT_PSEUDOCOUNT + totalNonArtifacts + NON_ARTIFACT_PSEUDOCOUNT); - - - final double artifactAltCount = potentialArtifacts.stream() - .mapToDouble(e -> e.forwardArtifactResponsibility * e.forwardAltCount + e.reverseArtifactResponsibility * e.reverseAltCount) - .sum(); - - final double artifactDepth = potentialArtifacts.stream() - .mapToDouble(e -> e.forwardArtifactResponsibility * e.forwardCount + e.reverseArtifactResponsibility * e.reverseCount) - .sum(); - - final double artifactBetaMean = (artifactAltCount + INITIAL_ALPHA_STRAND) / (artifactDepth + INITIAL_ALPHA_STRAND + INITIAL_BETA_STRAND); - - // We do the M step for the beta prior on the artifact allele fraction by brute force single-parameter optimization. - // By estimating the mean empirically as above we can fix mean = alpha / (alpha + beta), hence beta = (1/mean - 1) * alpha. - // This lets us do single-parameter optimization on alpha with beta/alpha fixed. - // brute force optimization is fairly cheap because the objective includes only calls that show some evidence of strand bias. - final DoubleUnaryOperator objective = alpha -> { - final double beta = (1 / artifactBetaMean - 1) * alpha; - return potentialArtifacts.stream() - .mapToDouble( e -> e.getForwardArtifactResponsibility() * artifactStrandLogLikelihood(e.forwardCount, e.forwardAltCount, alpha, beta) - + e.getReverseArtifactResponsibility() * artifactStrandLogLikelihood(e.reverseCount, e.reverseAltCount, alpha, beta)) + new IndexRange(0, eStepsForAltAlleles.size()).forEach(i -> { + List alleleESteps = eStepsForAltAlleles.get(i); + final List potentialArtifacts = alleleESteps.stream() + .filter(eStep -> eStep.getArtifactProbability() > 0.1).collect(Collectors.toList()); + final double totalArtifacts = potentialArtifacts.stream().mapToDouble(EStep::getArtifactProbability).sum(); + final double totalNonArtifacts = alleleESteps.stream().mapToDouble(e -> 1 - e.getArtifactProbability()).sum(); + alleleStrandArtifactPriors.set(i, (totalArtifacts + ARTIFACT_PSEUDOCOUNT) / (totalArtifacts + ARTIFACT_PSEUDOCOUNT + totalNonArtifacts + NON_ARTIFACT_PSEUDOCOUNT)); + + final double artifactAltCount = potentialArtifacts.stream() + .mapToDouble(e -> e.forwardArtifactResponsibility * e.forwardAltCount + e.reverseArtifactResponsibility * e.reverseAltCount) + .sum(); + + final double artifactDepth = potentialArtifacts.stream() + .mapToDouble(e -> e.forwardArtifactResponsibility * e.forwardCount + e.reverseArtifactResponsibility * e.reverseCount) .sum(); - }; - alphaStrand = OptimizationUtils.max(objective, 0.01, 100, INITIAL_ALPHA_STRAND, 0.01, 0.01, 100).getPoint(); - betaStrand = (1/artifactBetaMean - 1)*alphaStrand; - // free up memory - eSteps.clear(); + final double artifactBetaMean = (artifactAltCount + INITIAL_ALPHA_STRAND) / (artifactDepth + INITIAL_ALPHA_STRAND + INITIAL_BETA_STRAND); + + // We do the M step for the beta prior on the artifact allele fraction by brute force single-parameter optimization. + // By estimating the mean empirically as above we can fix mean = alpha / (alpha + beta), hence beta = (1/mean - 1) * alpha. + // This lets us do single-parameter optimization on alpha with beta/alpha fixed. + // brute force optimization is fairly cheap because the objective includes only calls that show some evidence of strand bias. + final DoubleUnaryOperator objective = alpha -> { + final double beta = (1 / artifactBetaMean - 1) * alpha; + return potentialArtifacts.stream() + .mapToDouble( e -> e.getForwardArtifactResponsibility() * artifactStrandLogLikelihood(e.forwardCount, e.forwardAltCount, alpha, beta) + + e.getReverseArtifactResponsibility() * artifactStrandLogLikelihood(e.reverseCount, e.reverseAltCount, alpha, beta)) + .sum(); + }; + + alphaStrand = OptimizationUtils.max(objective, 0.01, 100, INITIAL_ALPHA_STRAND, 0.01, 0.01, 100).getPoint(); + betaStrand = (1/artifactBetaMean - 1)*alphaStrand; + // free up memory + alleleESteps.clear(); + }); } @VisibleForTesting diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index 084749f2f83..8622f829e6e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -3,6 +3,7 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; @@ -22,18 +23,12 @@ public StrictStrandBiasFilter(final int minReadsOnEachStrand) { @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - List sbStr = vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_SB_TABLE_KEY, null); - if (sbStr == null || sbStr.size() <= 1) { + List> sbs = StrandBiasUtils.getSBsForAlleles(vc); + if (sbs == null || sbs.isEmpty() || sbs.size() <= 1) { return Collections.emptyList(); } - // skip the reference - List> sbs = sbStr.subList(1, sbStr.size()).stream().map( - asb -> AnnotationUtils.decodeAnyASListWithPrintDelim(asb).stream() - .mapToInt(Integer::parseInt).boxed().collect(Collectors.toList())).collect(Collectors.toList()); - - return sbs.stream().map(altList -> altList.stream().anyMatch(x -> x == 0)).collect(Collectors.toList()); - + return sbs.subList(1, sbs.size()).stream().map(altList -> altList.stream().anyMatch(x -> x == 0)).collect(Collectors.toList()); } @Override From 4344277d6ec1a6e2a5e87b226ee976397d88da21 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 16 Jan 2020 17:47:17 -0500 Subject: [PATCH 24/85] fixed strand artifact - fix and verify tests --- .../filtering/StrandArtifactFilter.java | 83 +++++++++---------- .../mutect/Mutect2IntegrationTest.java | 8 +- 2 files changed, 41 insertions(+), 50 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index 09e71380d14..de65a1b9cbd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -6,7 +6,6 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.tools.walkers.validation.basicshortmutpileup.BetaBinomialDistribution; -import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.OptimizationUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -33,13 +32,13 @@ public class StrandArtifactFilter extends Mutect2AlleleFilter { private static final double INITIAL_STRAND_ARTIFACT_PRIOR = 0.001; - private List alleleStrandArtifactPriors; + private double strandArtifactPrior = INITIAL_STRAND_ARTIFACT_PRIOR; private static final double ARTIFACT_PSEUDOCOUNT = 1; private static final double NON_ARTIFACT_PSEUDOCOUNT = 1000; - private final List> eStepsForAltAlleles = new ArrayList<>(); + private final List eSteps = new ArrayList<>(); @Override public ErrorType errorType() { return ErrorType.ARTIFACT; } @@ -58,12 +57,7 @@ public List calculateArtifactProbabilities(final VariantContext vc, final return Collections.emptyList(); } - if (alleleStrandArtifactPriors == null) { - alleleStrandArtifactPriors = Collections.nCopies(sbs.size()-1, INITIAL_STRAND_ARTIFACT_PRIOR); - } - final ListIterator indelSizeIterator = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList()).listIterator(); - final ListIterator priorIterator = alleleStrandArtifactPriors.listIterator(); int refFwd = sbs.get(0).get(0); int refRev = sbs.get(0).get(1); // skip the reference @@ -71,11 +65,10 @@ public List calculateArtifactProbabilities(final VariantContext vc, final return altSBs.stream().map(altSB -> { final int altIndelSize = indelSizeIterator.next(); - final double altPrior = priorIterator.next(); if (altSB.stream().mapToInt(Integer::intValue).sum() == 0 || altIndelSize > LONGEST_STRAND_ARTIFACT_INDEL_SIZE) { return new EStep(0, 0, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1)); } else { - return new EStep(altPrior, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1), altIndelSize); + return new EStep(strandArtifactPrior, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1), altIndelSize); } }).collect(Collectors.toList()); } @@ -83,53 +76,51 @@ public List calculateArtifactProbabilities(final VariantContext vc, final @Override protected void accumulateDataForLearning(final VariantContext vc, final ErrorProbabilities errorProbabilities, final Mutect2FilteringEngine filteringEngine) { if (requiredAnnotations().stream().allMatch(vc::hasAttribute)) { - final ListIterator eStepsIterator = calculateArtifactProbabilities(vc, filteringEngine).listIterator(); - eStepsForAltAlleles.stream().forEach(step -> step.add(eStepsIterator.next())); + final List altESteps = calculateArtifactProbabilities(vc, filteringEngine); + eSteps.addAll(altESteps); } } @Override protected void clearAccumulatedData() { - eStepsForAltAlleles.clear(); + eSteps.clear(); } @Override protected void learnParameters() { - new IndexRange(0, eStepsForAltAlleles.size()).forEach(i -> { - List alleleESteps = eStepsForAltAlleles.get(i); - final List potentialArtifacts = alleleESteps.stream() - .filter(eStep -> eStep.getArtifactProbability() > 0.1).collect(Collectors.toList()); - final double totalArtifacts = potentialArtifacts.stream().mapToDouble(EStep::getArtifactProbability).sum(); - final double totalNonArtifacts = alleleESteps.stream().mapToDouble(e -> 1 - e.getArtifactProbability()).sum(); - alleleStrandArtifactPriors.set(i, (totalArtifacts + ARTIFACT_PSEUDOCOUNT) / (totalArtifacts + ARTIFACT_PSEUDOCOUNT + totalNonArtifacts + NON_ARTIFACT_PSEUDOCOUNT)); - - final double artifactAltCount = potentialArtifacts.stream() - .mapToDouble(e -> e.forwardArtifactResponsibility * e.forwardAltCount + e.reverseArtifactResponsibility * e.reverseAltCount) - .sum(); - - final double artifactDepth = potentialArtifacts.stream() - .mapToDouble(e -> e.forwardArtifactResponsibility * e.forwardCount + e.reverseArtifactResponsibility * e.reverseCount) + final List potentialArtifacts = eSteps.stream() + .filter(eStep -> eStep.getArtifactProbability() > 0.1).collect(Collectors.toList()); + final double totalArtifacts = potentialArtifacts.stream().mapToDouble(EStep::getArtifactProbability).sum(); + final double totalNonArtifacts = eSteps.stream().mapToDouble(e -> 1 - e.getArtifactProbability()).sum(); + strandArtifactPrior = (totalArtifacts + ARTIFACT_PSEUDOCOUNT) / (totalArtifacts + ARTIFACT_PSEUDOCOUNT + totalNonArtifacts + NON_ARTIFACT_PSEUDOCOUNT); + + + final double artifactAltCount = potentialArtifacts.stream() + .mapToDouble(e -> e.forwardArtifactResponsibility * e.forwardAltCount + e.reverseArtifactResponsibility * e.reverseAltCount) + .sum(); + + final double artifactDepth = potentialArtifacts.stream() + .mapToDouble(e -> e.forwardArtifactResponsibility * e.forwardCount + e.reverseArtifactResponsibility * e.reverseCount) + .sum(); + + final double artifactBetaMean = (artifactAltCount + INITIAL_ALPHA_STRAND) / (artifactDepth + INITIAL_ALPHA_STRAND + INITIAL_BETA_STRAND); + + // We do the M step for the beta prior on the artifact allele fraction by brute force single-parameter optimization. + // By estimating the mean empirically as above we can fix mean = alpha / (alpha + beta), hence beta = (1/mean - 1) * alpha. + // This lets us do single-parameter optimization on alpha with beta/alpha fixed. + // brute force optimization is fairly cheap because the objective includes only calls that show some evidence of strand bias. + final DoubleUnaryOperator objective = alpha -> { + final double beta = (1 / artifactBetaMean - 1) * alpha; + return potentialArtifacts.stream() + .mapToDouble( e -> e.getForwardArtifactResponsibility() * artifactStrandLogLikelihood(e.forwardCount, e.forwardAltCount, alpha, beta) + + e.getReverseArtifactResponsibility() * artifactStrandLogLikelihood(e.reverseCount, e.reverseAltCount, alpha, beta)) .sum(); + }; - final double artifactBetaMean = (artifactAltCount + INITIAL_ALPHA_STRAND) / (artifactDepth + INITIAL_ALPHA_STRAND + INITIAL_BETA_STRAND); - - // We do the M step for the beta prior on the artifact allele fraction by brute force single-parameter optimization. - // By estimating the mean empirically as above we can fix mean = alpha / (alpha + beta), hence beta = (1/mean - 1) * alpha. - // This lets us do single-parameter optimization on alpha with beta/alpha fixed. - // brute force optimization is fairly cheap because the objective includes only calls that show some evidence of strand bias. - final DoubleUnaryOperator objective = alpha -> { - final double beta = (1 / artifactBetaMean - 1) * alpha; - return potentialArtifacts.stream() - .mapToDouble( e -> e.getForwardArtifactResponsibility() * artifactStrandLogLikelihood(e.forwardCount, e.forwardAltCount, alpha, beta) - + e.getReverseArtifactResponsibility() * artifactStrandLogLikelihood(e.reverseCount, e.reverseAltCount, alpha, beta)) - .sum(); - }; - - alphaStrand = OptimizationUtils.max(objective, 0.01, 100, INITIAL_ALPHA_STRAND, 0.01, 0.01, 100).getPoint(); - betaStrand = (1/artifactBetaMean - 1)*alphaStrand; - // free up memory - alleleESteps.clear(); - }); + alphaStrand = OptimizationUtils.max(objective, 0.01, 100, INITIAL_ALPHA_STRAND, 0.01, 0.01, 100).getPoint(); + betaStrand = (1/artifactBetaMean - 1)*alphaStrand; + // free up memory + eSteps.clear(); } @VisibleForTesting diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 37cceaa04c3..9cd65daaa86 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -514,7 +514,7 @@ public void testMitochondria() { public Object[][] vcfsForFiltering() { return new Object[][]{ {NA12878_MITO_VCF, 0.5, 30, Collections.emptyList(), Arrays.asList( - ImmutableSet.of(GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), + ImmutableSet.of(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), ImmutableSet.of(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), ImmutableSet.of( GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, @@ -523,10 +523,10 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), Collections.emptySet()), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strict_stand - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strand_bias, strict_stand + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, low_allele_frac, possible_numt|strict_strand, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4) // .|PASS )}, From f11171f505dd5052ed3e91cd029f2791a8835ef4 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 17 Jan 2020 13:37:52 -0500 Subject: [PATCH 25/85] fix bug in strand artifact. still need tests --- .../mutect/filtering/Mutect2FilteringEngine.java | 4 ++-- .../walkers/mutect/filtering/StrandArtifactFilter.java | 2 +- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 8 ++++---- .../tools/mutect/mito/unfiltered-with-assb.vcf | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 4ce28a0d53a..1b2178e032b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -278,9 +278,9 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { filters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); - filters.add(new StrandArtifactFilter()); // convert + filters.add(new StrandArtifactFilter()); // debug filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); - filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); // convert + filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); // test gvcf filters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); filters.add(new MinAlleleFractionFilter(MTFAC.minAf)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index de65a1b9cbd..54e86f691f2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -68,7 +68,7 @@ public List calculateArtifactProbabilities(final VariantContext vc, final if (altSB.stream().mapToInt(Integer::intValue).sum() == 0 || altIndelSize > LONGEST_STRAND_ARTIFACT_INDEL_SIZE) { return new EStep(0, 0, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1)); } else { - return new EStep(strandArtifactPrior, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1), altIndelSize); + return strandArtifactProbability(strandArtifactPrior, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1), altIndelSize); } }).collect(Collectors.toList()); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 9cd65daaa86..7579355418a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -514,7 +514,7 @@ public void testMitochondria() { public Object[][] vcfsForFiltering() { return new Object[][]{ {NA12878_MITO_VCF, 0.5, 30, Collections.emptyList(), Arrays.asList( - ImmutableSet.of(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), + ImmutableSet.of(GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), ImmutableSet.of(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), ImmutableSet.of( GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, @@ -523,10 +523,10 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), Collections.emptySet()), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strand_bias, strict_stand - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME), // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD,GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strict_stand + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME /*GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME*/ + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, low_allele_frac, possible_numt|strict_strand, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4) // .|PASS )}, diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf index 299db9d1364..fc17979364c 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf @@ -37,9 +37,9 @@ ##filtering_status=Warning: unfiltered Mutect2 calls. Please run FilterMutectCalls to remove false positives. ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chrM 152 . T C . . AS_SB_TABLE=0|3,1556|0;DP=1582;ECNT=1;TLOD=5266.19;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 152 . T C . . AS_SB_TABLE=1|2,0|1556;DP=1582;ECNT=1;TLOD=5266.19;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true chrM 263 . A G . . AS_SB_TABLE=1|0,431|400;DP=858;ECNT=4;TLOD=2641.72;POPAF=5.000e-08;OCM=800 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 -chrM 301 . A AC . . AS_SB_TABLE=500|79,25|28;DP=680;ECNT=4;TLOD=3.32;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0|5,200|201,30|37,0|49;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 -chrM 310 . T TC . . AS_SB_TABLE=0|0,200|458;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 -chrM 750 . A G . . AS_SB_TABLE=0|1,700|824;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true +chrM 301 . A AC . . AS_SB_TABLE=500|149,41|12;DP=680;ECNT=4;TLOD=3.32;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0|5,1|400,30|37,0|49;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . . AS_SB_TABLE=0|0,300|358;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . . AS_SB_TABLE=0|1,100|1424;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 72d87ab804cbb2242bec1b6116b3751630ae0d77 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 17 Jan 2020 14:50:15 -0500 Subject: [PATCH 26/85] strand artifact - use total from all alleles --- .../walkers/mutect/filtering/StrandArtifactFilter.java | 8 ++++---- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 4 ++-- .../hellbender/tools/mutect/mito/unfiltered-with-assb.vcf | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index 54e86f691f2..149232c2886 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -58,17 +58,17 @@ public List calculateArtifactProbabilities(final VariantContext vc, final } final ListIterator indelSizeIterator = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList()).listIterator(); - int refFwd = sbs.get(0).get(0); - int refRev = sbs.get(0).get(1); + int totalFwd = sbs.stream().map(sb -> sb.get(0)).reduce(0, Math::addExact); + int totalRev = sbs.stream().map(sb -> sb.get(1)).reduce(0, Math::addExact); // skip the reference List> altSBs = sbs.subList(1, sbs.size()); return altSBs.stream().map(altSB -> { final int altIndelSize = indelSizeIterator.next(); if (altSB.stream().mapToInt(Integer::intValue).sum() == 0 || altIndelSize > LONGEST_STRAND_ARTIFACT_INDEL_SIZE) { - return new EStep(0, 0, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1)); + return new EStep(0, 0, totalFwd, totalRev, altSB.get(0), altSB.get(1)); } else { - return strandArtifactProbability(strandArtifactPrior, refFwd + altSB.get(0), refRev + altSB.get(1), altSB.get(0), altSB.get(1), altIndelSize); + return strandArtifactProbability(strandArtifactPrior, totalFwd, totalRev, altSB.get(0), altSB.get(1), altIndelSize); } }).collect(Collectors.toList()); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 7579355418a..c38a8038b42 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -523,10 +523,10 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), Collections.emptySet()), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD,GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strict_stand + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, /*GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " +*/ GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strand_bias, strict_stand Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME /*GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME*/ + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, low_allele_frac, possible_numt|strict_strand, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4) // .|PASS )}, diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf index fc17979364c..5a267f502d9 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf @@ -37,9 +37,9 @@ ##filtering_status=Warning: unfiltered Mutect2 calls. Please run FilterMutectCalls to remove false positives. ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chrM 152 . T C . . AS_SB_TABLE=1|2,0|1556;DP=1582;ECNT=1;TLOD=5266.19;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 152 . T C . . AS_SB_TABLE=0|3,0|1556;DP=1582;ECNT=1;TLOD=5266.19;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true chrM 263 . A G . . AS_SB_TABLE=1|0,431|400;DP=858;ECNT=4;TLOD=2641.72;POPAF=5.000e-08;OCM=800 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 chrM 301 . A AC . . AS_SB_TABLE=500|149,41|12;DP=680;ECNT=4;TLOD=3.32;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0|5,1|400,30|37,0|49;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0|5,51|350,60|7,49|0;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 chrM 310 . T TC . . AS_SB_TABLE=0|0,300|358;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 chrM 750 . A G . . AS_SB_TABLE=0|1,100|1424;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 6bf1ecfbd4148c22837d28e968cffad1e9d98583 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 17 Jan 2020 15:05:58 -0500 Subject: [PATCH 27/85] put AS_SB_TABLE back into gvcf format --- .../annotator/allelespecific/StrandBiasUtils.java | 9 +++++---- .../tools/mutect/mito/unfiltered-with-assb.vcf | 12 ++++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java index 5656e5b877a..1e6a8afaf06 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java @@ -147,13 +147,14 @@ protected static boolean passesMinimumThreshold(final ReducibleAnnotationData
  • > getSBsForAlleles(VariantContext vc) { - List sbStr = vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_SB_TABLE_KEY, null); + String sbStr = vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_SB_TABLE_KEY, null); if (sbStr == null || sbStr.isEmpty()) { return Collections.emptyList(); } - return sbStr.stream().map( - asb -> AnnotationUtils.decodeAnyASListWithPrintDelim(asb).stream() - .mapToInt(Integer::parseInt).boxed().collect(Collectors.toList())).collect(Collectors.toList()); + List asb = AnnotationUtils.decodeAnyASListWithPrintDelim(sbStr); + return asb.stream() + .map(fwdrev -> AnnotationUtils.decodeAnyASList(fwdrev).stream().map(String::trim) + .mapToInt(Integer::parseInt).boxed().collect(Collectors.toList())).collect(Collectors.toList()); } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf index 5a267f502d9..e93973ca0c7 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf @@ -37,9 +37,9 @@ ##filtering_status=Warning: unfiltered Mutect2 calls. Please run FilterMutectCalls to remove false positives. ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chrM 152 . T C . . AS_SB_TABLE=0|3,0|1556;DP=1582;ECNT=1;TLOD=5266.19;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -chrM 263 . A G . . AS_SB_TABLE=1|0,431|400;DP=858;ECNT=4;TLOD=2641.72;POPAF=5.000e-08;OCM=800 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 -chrM 301 . A AC . . AS_SB_TABLE=500|149,41|12;DP=680;ECNT=4;TLOD=3.32;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0|5,51|350,60|7,49|0;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 -chrM 310 . T TC . . AS_SB_TABLE=0|0,300|358;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 -chrM 750 . A G . . AS_SB_TABLE=0|1,100|1424;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true +chrM 152 . T C . . AS_SB_TABLE=0,3|0,1556;DP=1582;ECNT=1;TLOD=5266.19;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . . AS_SB_TABLE=1,0|431,400;DP=858;ECNT=4;TLOD=2641.72;POPAF=5.000e-08;OCM=800 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 301 . A AC . . AS_SB_TABLE=500,149|41,12;DP=680;ECNT=4;TLOD=3.32;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0,5|51,350|60,7|49,0;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . . AS_SB_TABLE=0,0|300,358;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . . AS_SB_TABLE=0,1|100,1424;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 781064acd987da270153f9ad205ad087e408821c Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 21 Jan 2020 13:08:02 -0500 Subject: [PATCH 28/85] fix bug in strict strand filter that was ignoring the minReadsOnEachStrand parameter --- .../tools/walkers/mutect/filtering/StrictStrandBiasFilter.java | 3 +-- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index 8622f829e6e..05d110860bf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -2,7 +2,6 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; @@ -24,7 +23,7 @@ public StrictStrandBiasFilter(final int minReadsOnEachStrand) { @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { List> sbs = StrandBiasUtils.getSBsForAlleles(vc); - if (sbs == null || sbs.isEmpty() || sbs.size() <= 1) { + if (minReadsOnEachStrand == 0 || sbs == null || sbs.isEmpty() || sbs.size() <= 1) { return Collections.emptyList(); } // skip the reference diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index c38a8038b42..79975badcea 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -559,6 +559,7 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti args -> args.add(M2FiltersArgumentCollection.MIN_AF_LONG_NAME, minAlleleFraction), args -> args.add(M2FiltersArgumentCollection.MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, autosomalCoverage), args -> args.add(M2FiltersArgumentCollection.MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, 4.0), + args -> args.add(M2FiltersArgumentCollection.MIN_READS_ON_EACH_STRAND_LONG_NAME, 1), args -> { intervals.stream().map(SimpleInterval::new).forEach(args::addInterval); return args; From 6f19cc3f3db70e39b3455a0df1cc42393aa243fc Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 21 Jan 2020 13:27:59 -0500 Subject: [PATCH 29/85] minor changes --- .../tools/walkers/mutect/filtering/BaseQualityFilter.java | 7 ------- .../walkers/mutect/filtering/ContaminationFilter.java | 2 -- .../walkers/mutect/filtering/FilteredHaplotypeFilter.java | 2 +- .../walkers/mutect/filtering/MinAlleleFractionFilter.java | 5 +---- .../walkers/mutect/filtering/Mutect2FilteringEngine.java | 4 ++-- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 3 --- 6 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java index 1a58595952a..64e5a780cf9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java @@ -1,18 +1,11 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; -import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; -import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashMap; import java.util.List; -import java.util.function.Predicate; import java.util.stream.Collectors; public class BaseQualityFilter extends HardAlleleFilter { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java index 84d2dc7c761..b6437eb2021 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java @@ -2,8 +2,6 @@ import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.vcf.VCFConstants; -import org.apache.commons.lang.math.IntRange; import org.apache.commons.lang3.tuple.ImmutablePair; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.contamination.ContaminationRecord; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java index 8baaa859fc5..2f912799441 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java @@ -58,7 +58,7 @@ protected void accumulateDataForLearning(final VariantContext vc, final ErrorPro final double artifactProbability = errorProbabilities.getProbabilitiesByFilter().entrySet().stream() .filter(e -> e.getKey().errorType() != ErrorType.SEQUENCING) .filter(e -> !e.getKey().filterName().equals(filterName())) - .flatMap(e -> e.getValue().stream()) + .flatMap(e -> e.getValue().stream()) // the value is a list of double, we need the max of all the lists .max(Double::compareTo).orElse(0.0); for (final Genotype tumorGenotype : vc.getGenotypes()) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java index f34ed3a2d91..c897b705c5b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java @@ -3,7 +3,6 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.lang.mutable.MutableBoolean; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -12,8 +11,6 @@ import java.util.*; import java.util.function.Predicate; import java.util.stream.Collectors; -import java.util.stream.DoubleStream; -import java.util.stream.IntStream; public class MinAlleleFractionFilter extends HardAlleleFilter { private final double minAf; @@ -38,7 +35,7 @@ public List getAltData(Genotype g) { public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { LinkedHashMap> dataByAllele = getAltDataByAllele(vc, checkPreconditions(), this::getAltData, filteringEngine); return dataByAllele.entrySet().stream() - .filter(entry -> /*!entry.getKey().isSymbolic() &&*/ !vc.getReference().equals(entry.getKey())) + .filter(entry -> !vc.getReference().equals(entry.getKey())) .map(entry -> entry.getValue().stream().max(Double::compare).orElse(1.0) < minAf).collect(Collectors.toList()); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 1b2178e032b..4f2cfd5d1cf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -278,7 +278,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { filters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); - filters.add(new StrandArtifactFilter()); // debug + filters.add(new StrandArtifactFilter()); // test gvcf filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); // test gvcf filters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); @@ -304,7 +304,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { } if (MTFAC.mitochondria) { - filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); // convert!! + filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); // TODO convert!! filters.add(new NuMTFilter(MTFAC.medianAutosomalCoverage, MTFAC.maxNuMTAutosomalCopies)); } else { filters.add(new ClusteredEventsFilter(MTFAC.maxEventsInRegion)); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 79975badcea..d213eba380f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -14,7 +14,6 @@ import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; -import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReadThreadingAssemblerArgumentCollection; @@ -76,8 +75,6 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest { private static final File NA12878_MITO_BAM = new File(toolsTestDir, "mutect/mito/NA12878.bam"); private static final File NA12878_MITO_VCF = new File(toolsTestDir, "mutect/mito/unfiltered-with-assb.vcf"); -// private static final File NA12878_MITO_VCF = new File(toolsTestDir, "mutect/mito/unfiltered.vcf"); -// private static final File NA12878_MITO_GVCF = new File(toolsTestDir, "mutect/mito/unfiltered-assb.g.vcf"); private static final File NA12878_MITO_GVCF = new File(toolsTestDir, "mitochondria/NA12878.MT.g.vcf"); private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); private static final File DEEP_MITO_BAM = new File(largeFileTestDir, "mutect/highDPMTsnippet.bam"); From d9318c08fc0acc63ff8774dac65f30e2bb0e4952 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 21 Jan 2020 15:19:20 -0500 Subject: [PATCH 30/85] remove generic from Mutect2AlleleFilter and make generic methods static. convert chimera filter to allele specific --- .../mutect/filtering/BaseQualityFilter.java | 2 +- .../ChimericOriginalAlignmentFilter.java | 29 ++++++++++++++----- .../mutect/filtering/ContaminationFilter.java | 2 +- .../filtering/DuplicatedAltReadFilter.java | 2 +- .../mutect/filtering/HardAlleleFilter.java | 2 +- ....java => MTLowHeteroplasmyFilterTool.java} | 2 +- .../filtering/MappingQualityFilter.java | 3 +- .../filtering/MinAlleleFractionFilter.java | 2 +- .../mutect/filtering/Mutect2AlleleFilter.java | 18 +++++------- .../walkers/mutect/filtering/NuMTFilter.java | 5 ++-- .../mutect/filtering/ReadPositionFilter.java | 2 +- .../filtering/StrandArtifactFilter.java | 2 +- .../filtering/StrictStrandBiasFilter.java | 2 +- .../mutect/filtering/TumorEvidenceFilter.java | 2 +- .../mutect/Mutect2IntegrationTest.java | 4 +-- 15 files changed, 44 insertions(+), 35 deletions(-) rename src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/{MTLowHeteroplasmyFilter.java => MTLowHeteroplasmyFilterTool.java} (98%) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java index 64e5a780cf9..147105a5dcc 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java @@ -8,7 +8,7 @@ import java.util.List; import java.util.stream.Collectors; -public class BaseQualityFilter extends HardAlleleFilter { +public class BaseQualityFilter extends HardAlleleFilter { private final double minMedianBaseQuality; public BaseQualityFilter(final double minMedianBaseQuality) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java index e8449d88c76..f5503a3f1c1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java @@ -1,12 +1,16 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; -import java.util.Collections; -import java.util.List; +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Collectors; -public class ChimericOriginalAlignmentFilter extends HardFilter { +public class ChimericOriginalAlignmentFilter extends HardAlleleFilter { private final double maxNuMTFraction; public ChimericOriginalAlignmentFilter(final double maxNuMTFraction) { @@ -16,15 +20,24 @@ public ChimericOriginalAlignmentFilter(final double maxNuMTFraction) { @Override public ErrorType errorType() { return ErrorType.ARTIFACT; } + public Predicate checkPreconditions() { + return Genotype::hasAD; + } + + public List getData(Genotype g) { + return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); + } + @Override - public boolean isArtifact(final VariantContext vc, final Mutect2FilteringEngine filteringEngine) { + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { if(!vc.isBiallelic()) { - return false; + return Collections.emptyList(); } - - final int altCount = vc.getGenotypes().stream().mapToInt(g -> g.getAD()[1]).sum(); final int nonMitochondrialOriginalAlignmentCount = vc.getAttributeAsInt(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, 0); - return (double) nonMitochondrialOriginalAlignmentCount / altCount > maxNuMTFraction; + LinkedHashMap> dataByAllele = getDataByAllele(vc, checkPreconditions(), this::getData, filteringEngine); + return dataByAllele.entrySet().stream() + .filter(entry -> !vc.getReference().equals(entry.getKey())) + .map(entry -> (double) nonMitochondrialOriginalAlignmentCount / entry.getValue().stream().mapToInt(Integer::intValue).sum() > maxNuMTFraction).collect(Collectors.toList()); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java index b6437eb2021..5e2c0868781 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java @@ -15,7 +15,7 @@ import java.util.*; import java.util.stream.Collectors; -public class ContaminationFilter extends Mutect2AlleleFilter { +public class ContaminationFilter extends Mutect2AlleleFilter { private final Map contaminationBySample; private final double defaultContamination; private final double EPSILON = 1.0e-10; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java index 935fba94182..63560854367 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java @@ -13,7 +13,7 @@ // This filter checks for the case in which PCR-duplicates with unique UMIs (which we assume is caused by false adapter priming) // amplify the erroneous signal for an alternate allele. -public class DuplicatedAltReadFilter extends HardAlleleFilter { +public class DuplicatedAltReadFilter extends HardAlleleFilter { private final int uniqueAltReadCount; public DuplicatedAltReadFilter(final int uniqueAltReadCount) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java index 8e39f616f20..9178aa2ee15 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java @@ -7,7 +7,7 @@ import java.util.*; import java.util.stream.Collectors; -public abstract class HardAlleleFilter extends Mutect2AlleleFilter { +public abstract class HardAlleleFilter extends Mutect2AlleleFilter { @Override public List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { List alleleArtifacts = areAllelesArtifacts(vc, filteringEngine, referenceContext); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java similarity index 98% rename from src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilter.java rename to src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java index b8b28239742..20de0fa057e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java @@ -26,7 +26,7 @@ oneLineSummary = "If too many low het sites, filter all low het sites", programGroup = VariantFilteringProgramGroup.class ) -public class MTLowHeteroplasmyFilter extends TwoPassVariantWalker { +public class MTLowHeteroplasmyFilterTool extends TwoPassVariantWalker { public static final String MIN_LOW_HET_SITES_LONG_NAME = "min-low-het-sites"; public static final String LOW_HET_THRESHOLD_LONG_NAME = "low-het-threshold"; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java index 3074ab14b0a..469fa6677fa 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java @@ -9,8 +9,7 @@ import java.util.List; import java.util.stream.Collectors; -// TODO this class doens't use the generic method - how to simplify? -public class MappingQualityFilter extends HardAlleleFilter { +public class MappingQualityFilter extends HardAlleleFilter { private final double minMedianMappingQuality; private final int longIndelSize; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java index c897b705c5b..30c2495a88e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java @@ -12,7 +12,7 @@ import java.util.function.Predicate; import java.util.stream.Collectors; -public class MinAlleleFractionFilter extends HardAlleleFilter { +public class MinAlleleFractionFilter extends HardAlleleFilter { private final double minAf; public MinAlleleFractionFilter(final double minAf) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 24a7f438b7b..c0e0d431988 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -4,29 +4,27 @@ import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.utils.IndexRange; import java.util.*; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; -public abstract class Mutect2AlleleFilter extends Mutect2Filter { +public abstract class Mutect2AlleleFilter extends Mutect2Filter { - - public LinkedHashMap> getDataByAllele(final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { + public static LinkedHashMap> getDataByAllele(final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values LinkedHashMap> dataByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); return combineDataByAllele(dataByAllele, vc, preconditions, getData, filteringEngine); } - public LinkedHashMap> getAltDataByAllele(final VariantContext vc, Predicate preconditions, Function> getAltData, final Mutect2FilteringEngine filteringEngine) { + public static LinkedHashMap> getAltDataByAllele(final VariantContext vc, Predicate preconditions, Function> getAltData, final Mutect2FilteringEngine filteringEngine) { // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values LinkedHashMap> dataByAllele = vc.getAlternateAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); return combineDataByAllele(dataByAllele, vc, preconditions, getAltData, filteringEngine); } - private LinkedHashMap> combineDataByAllele(final LinkedHashMap> dataByAllele, final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { + private static LinkedHashMap> combineDataByAllele(final LinkedHashMap> dataByAllele, final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele vc.getGenotypes().stream().filter(preconditions).filter(filteringEngine::isTumor) @@ -41,19 +39,19 @@ private LinkedHashMap> combineDataByAllele(final LinkedHashMap errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { return requiredAnnotations().stream().allMatch(vc::hasAttribute) ? - calculateErrorProbabilityForAlleles(vc, filteringEngine, referenceContext) : - // TODO make sure that somewhere the roundFinitePrecisionErrors is called when not a hard filter -// .entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> Mutect2FilteringEngine.roundFinitePrecisionErrors(entry.getValue()))) : + calculateErrorProbabilityForAlleles(vc, filteringEngine, referenceContext) + .stream().map(prob -> Mutect2FilteringEngine.roundFinitePrecisionErrors(prob)).collect(Collectors.toList()) : Collections.emptyList(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java index 9c6e199ad46..9f6cc451229 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java @@ -3,7 +3,6 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.collections.functors.AndPredicate; import org.apache.commons.math3.distribution.PoissonDistribution; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -13,7 +12,7 @@ import java.util.stream.Collectors; -public class NuMTFilter extends HardAlleleFilter { +public class NuMTFilter extends HardAlleleFilter { private static final double LOWER_BOUND_PROB = .01; private final int maxAltDepthCutoff; @@ -41,7 +40,7 @@ public List getData(Genotype g) { public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { LinkedHashMap> dataByAllele = getDataByAllele(vc, checkPreconditions(), this::getData, filteringEngine); return dataByAllele.entrySet().stream() - .filter(entry -> /*!entry.getKey().isSymbolic() &&*/ !vc.getReference().equals(entry.getKey())) + .filter(entry -> !vc.getReference().equals(entry.getKey())) .map(entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff).collect(Collectors.toList()); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java index ba229619baa..0cca5690f1d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java @@ -8,7 +8,7 @@ import java.util.List; import java.util.stream.Collectors; -public class ReadPositionFilter extends HardAlleleFilter { +public class ReadPositionFilter extends HardAlleleFilter { private final double minMedianReadPosition; public ReadPositionFilter(final double minMedianReadPosition) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index 149232c2886..666b93343c1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -14,7 +14,7 @@ import java.util.function.DoubleUnaryOperator; import java.util.stream.Collectors; -public class StrandArtifactFilter extends Mutect2AlleleFilter { +public class StrandArtifactFilter extends Mutect2AlleleFilter { // beta prior on strand bias allele fraction private double INITIAL_ALPHA_STRAND = 1.0; private double INITIAL_BETA_STRAND = 20.0; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index 05d110860bf..653e3ca1fed 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -10,7 +10,7 @@ import java.util.List; import java.util.stream.Collectors; -public class StrictStrandBiasFilter extends HardAlleleFilter> { +public class StrictStrandBiasFilter extends HardAlleleFilter { private final int minReadsOnEachStrand; public StrictStrandBiasFilter(final int minReadsOnEachStrand) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java index faebb1c72ac..6b850f4663c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java @@ -10,7 +10,7 @@ import java.util.*; -public class TumorEvidenceFilter extends Mutect2AlleleFilter { +public class TumorEvidenceFilter extends Mutect2AlleleFilter { @Override public ErrorType errorType() { return ErrorType.SEQUENCING; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index d213eba380f..01d170b12c0 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -520,8 +520,8 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), Collections.emptySet()), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, /*GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " +*/ GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strand_bias, strict_stand - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strict_stand + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), // .|numt_chimera Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS From ec8f63cc274899e5d1322000dfd19cbb4ecf6109 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 21 Jan 2020 15:52:14 -0500 Subject: [PATCH 31/85] minor changes and comments --- .../mutect/filtering/DuplicatedAltReadFilter.java | 3 --- .../walkers/mutect/filtering/ErrorProbabilities.java | 10 ++++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java index 63560854367..bfa66ba43db 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java @@ -3,13 +3,10 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.UniqueAltReadCount; -import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; -import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.stream.Collectors; // This filter checks for the case in which PCR-duplicates with unique UMIs (which we assume is caused by false adapter priming) // amplify the erroneous signal for an alternate allele. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 14e7af3b45c..d4876c5fbc1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -36,8 +36,10 @@ public ErrorProbabilities(final List filters, final VariantContex } LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); + // convert the data so we have a list of probabilities by allele instead of filter probabilitiesByAllelesForEachFilter.replaceAll((k, v) -> ErrorProbabilities.transpose(v)); + // foreach error type, get the max probability for each allele probabilitiesByTypeAndAllele = probabilitiesByAllelesForEachFilter.entrySet().stream().collect(toMap( Map.Entry::getKey, entry -> entry.getValue().stream().map(alleleList -> alleleList.stream().max(Double::compare).orElse(0.0)).collect(Collectors.toList()), @@ -45,6 +47,8 @@ public ErrorProbabilities(final List filters, final VariantContex // treat errors of different types as independent + // transpose the lists of allele probabilities, so it is now a list per allele that contains the prob for each type + // combine allele-wise combinedErrorProbabilitiesByAllele = transpose(probabilitiesByTypeAndAllele.values().stream().collect(toList())) .stream().map( alleleProbabilities -> alleleProbabilities.stream().map(p -> 1.0 - p).reduce(1.0, (a, b) -> a * b)).collect(Collectors.toList()); @@ -56,6 +60,8 @@ public ErrorProbabilities(final List filters, final VariantContex public List getNonSomaticProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.NON_SOMATIC); } public Map> getProbabilitiesByFilter() { return alleleProbabilitiesByFilter; } + // helper functions for the few operations that still differ depending on whether the filter + // is per variant or allele public Map> getProbabilitiesForAlleleFilters() { return getPartitionedProbabilitiesByFilter(false); } @@ -76,7 +82,11 @@ private Map> getPartitionedProbabilitiesByFilter(boo // TODO would this be useful in a util class somewhere? private static List> transpose(List> list) { + // all lists need to be the same size final int N = list.stream().mapToInt(l -> l.size()).max().orElse(-1); + if (list.stream().anyMatch(l -> l.size() != N)) { + + } List> iterList = list.stream().map(it->it.iterator()).collect(toList()); return IntStream.range(0, N) .mapToObj(n -> iterList.stream() From b5e207a8a30fd7f78c31c3e06be9a9c4a6f01729 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 23 Jan 2020 15:31:15 -0500 Subject: [PATCH 32/85] update from PR feedback --- .../Haplochecker/Dockerfile | 5 +- .../allelespecific/StrandBiasUtils.java | 37 +++------ .../clustering/SomaticClusteringModel.java | 36 +++++--- .../mutect/filtering/BaseQualityFilter.java | 5 +- .../ChimericOriginalAlignmentFilter.java | 10 +-- .../mutect/filtering/ContaminationFilter.java | 6 +- .../mutect/filtering/ErrorProbabilities.java | 47 +++++++---- .../filtering/MinAlleleFractionFilter.java | 12 +-- .../filtering/Mutect2FilteringEngine.java | 82 +++++++++++-------- .../filtering/Mutect2VariantFilter.java | 6 +- .../walkers/mutect/filtering/NuMTFilter.java | 7 +- .../filtering/StrandArtifactFilter.java | 30 +++++-- .../mutect/filtering/TumorEvidenceFilter.java | 11 ++- .../utils/variant/GATKVCFConstants.java | 5 +- .../utils/variant/GATKVCFHeaderLines.java | 1 + .../mutect/Mutect2IntegrationTest.java | 7 +- ...a => MTLowHeteroplasmyFilterToolTest.java} | 6 +- .../mito/expected_LowHetNone_output.txt | 2 +- ...HetVariantWalkerIntegrationTest_output.txt | 14 ++-- .../mutect/mito/unfiltered-with-assb.vcf | 1 + .../tools/mutect/mito/unfiltered.vcf | 1 + 21 files changed, 180 insertions(+), 151 deletions(-) rename src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/{MTLowHeteroplasmyFilterTest.java => MTLowHeteroplasmyFilterToolTest.java} (90%) diff --git a/scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile b/scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile index b2e265409e3..89242ee83f7 100644 --- a/scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile +++ b/scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile @@ -9,5 +9,6 @@ RUN apt-get update && \ WORKDIR /usr/mtdnaserver # Download mitolib jar -RUN wget https://github.com/haansi/mitolib/releases/download/0.1.2/mitolib-0.1.2.jar && \ - mv mitolib-0.1.2.jar mitolib.jar +#RUN wget https://github.com/haansi/mitolib/releases/download/0.1.2/mitolib-0.1.2.jar && \ +RUN wget https://github.com/leklab/haplocheckCLI/blob/master/haplocheckCLI.jar && \ + mv haplocheckCLI.jar mitolib.jar diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java index 1e6a8afaf06..c3c78d5f0cc 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java @@ -15,7 +15,6 @@ public class StrandBiasUtils { public static final int FORWARD = 0; public static final int REVERSE = 1; public static final int MIN_COUNT = 2; - public static final String PRINT_DELIM = "|"; private static final List ZERO_LIST = new ArrayList<>(Arrays.asList(0,0)); public static Map computeSBAnnotation(VariantContext vc, AlleleLikelihoods likelihoods, String key) { @@ -31,29 +30,16 @@ public static Map computeSBAnnotation(VariantContext vc, AlleleL } protected static String makeRawAnnotationString(final List vcAlleles, final Map> perAlleleValues) { - String annotationString = ""; - for (final Allele a : vcAlleles) { - if (!annotationString.isEmpty()) { - annotationString += PRINT_DELIM; - } - List alleleValues = perAlleleValues.get(a); - if (alleleValues == null) { - alleleValues = ZERO_LIST; - } - annotationString += encode(alleleValues); - } - return annotationString; + final List alleleStrings = vcAlleles.stream() + .map(a -> perAlleleValues.getOrDefault(a, ZERO_LIST)) + .map(StrandBiasUtils::encode) + .collect(Collectors.toList()); + return String.join(AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM, alleleStrings); + } protected static String encode(List alleleValues) { - String annotationString = ""; - for (int j =0; j < alleleValues.size(); j++) { - annotationString += alleleValues.get(j); - if (j < alleleValues.size()-1) { - annotationString += ","; - } - } - return annotationString; + return String.join(",", alleleValues.stream().map(i -> i.toString()).collect(Collectors.toList())); } @@ -121,12 +107,9 @@ private static void updateTable(final Allele bestAllele, final GATKRead read, fi alleleStrandCounts.add(0,0); alleleStrandCounts.add(1,0); } - final boolean isForward = !read.isReverseStrand(); - if (isForward) { - alleleStrandCounts.set(FORWARD, alleleStrandCounts.get(FORWARD) + 1); - } else { - alleleStrandCounts.set(REVERSE, alleleStrandCounts.get(REVERSE) + 1); - } + + final int strand = read.isReverseStrand() ? REVERSE : FORWARD; + alleleStrandCounts.set(strand, alleleStrandCounts.get(strand) + 1); perAlleleValues.putAttribute(bestAllele, alleleStrandCounts); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java index a5697d7068f..ec016642820 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.clustering; +import htsjdk.variant.variantcontext.Allele; import com.google.common.primitives.Doubles; import htsjdk.variant.variantcontext.VariantContext; import it.unimi.dsi.fastutil.ints.Int2DoubleArrayMap; @@ -88,20 +89,33 @@ public SomaticClusteringModel(final M2FiltersArgumentCollection MTFAC, final Lis logClusterWeights = new double[] {Math.log1p(INITIAL_HIGH_AF_WEIGHT), Math.log(INITIAL_HIGH_AF_WEIGHT)}; } - public void record(final int[] tumorADs, final double[] tumorLogOdds, final List artifactProbabilities, final List nonSomaticProbabilities, final VariantContext vc) { - + /** + * Adds data to the model for every alternate allele + * @param tumorADs for all alleles, summed over samples + * @param tumorLogOdds for alt alleles only + * @param artifactProbabilities by alt allele, specifically technical artifact probabilities not including sequencing error, contamination, or germline variation + * @param nonSomaticProbabilities by alt allele, probabilities that the variants are real but not somatic ie germline or contamination + * @param vc + */ + public void record(int[] tumorADs, final double[] tumorLogOdds, final List artifactProbabilities, final List nonSomaticProbabilities, final VariantContext vc) { + // set tumorAD to 0 for symbolic alleles so it won't contribute to overall AD + List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); + // convert allele index to alt allele index + List symIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-1).collect(Collectors.toList()); + symIndexes.forEach(i -> tumorADs[i] = 0); final int totalAD = (int) MathUtils.sum(tumorADs); - // split into one-vs-all biallelics for clustering + + // split into separate alt alleles for clustering for (int i = 0; i < tumorLogOdds.length; i++) { - // things that are definitely not somatic don't need to go in the somatic clustering model - if (artifactProbabilities.get(i) > OBVIOUS_ARTIFACT_PROBABILITY_THRESHOLD) { - obviousArtifactCount.increment(); - continue; - } else if (nonSomaticProbabilities.get(i) > OBVIOUS_ARTIFACT_PROBABILITY_THRESHOLD) { - continue; + if (!vc.getAlternateAllele(i).isSymbolic()) { + if (artifactProbabilities.get(i) > OBVIOUS_ARTIFACT_PROBABILITY_THRESHOLD) { + obviousArtifactCount.increment(); + continue; + } else if (nonSomaticProbabilities.get(i) > OBVIOUS_ARTIFACT_PROBABILITY_THRESHOLD) { + continue; + } + data.add(new Datum(tumorLogOdds[i], artifactProbabilities.get(i), nonSomaticProbabilities.get(i), tumorADs[i + 1], totalAD, indelLength(vc, i))); } - - data.add(new Datum(tumorLogOdds[i], artifactProbabilities.get(i), nonSomaticProbabilities.get(i), tumorADs[i+1], totalAD, indelLength(vc, i))); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java index 147105a5dcc..4424b445e29 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java @@ -20,9 +20,8 @@ public BaseQualityFilter(final double minMedianBaseQuality) { @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - List baseQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY, 0); - baseQualityByAllele.remove(0); // get rid of ref - return baseQualityByAllele.stream().map(qual -> qual < minMedianBaseQuality).collect(Collectors.toList()); + return vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY, 0).stream().skip(1) // skip ref + .map(qual -> qual < minMedianBaseQuality).collect(Collectors.toList()); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java index f5503a3f1c1..b32ca32fee2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java @@ -7,7 +7,6 @@ import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import java.util.*; -import java.util.function.Predicate; import java.util.stream.Collectors; public class ChimericOriginalAlignmentFilter extends HardAlleleFilter { @@ -20,21 +19,14 @@ public ChimericOriginalAlignmentFilter(final double maxNuMTFraction) { @Override public ErrorType errorType() { return ErrorType.ARTIFACT; } - public Predicate checkPreconditions() { - return Genotype::hasAD; - } - public List getData(Genotype g) { return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); } @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - if(!vc.isBiallelic()) { - return Collections.emptyList(); - } final int nonMitochondrialOriginalAlignmentCount = vc.getAttributeAsInt(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, 0); - LinkedHashMap> dataByAllele = getDataByAllele(vc, checkPreconditions(), this::getData, filteringEngine); + LinkedHashMap> dataByAllele = getDataByAllele(vc, Genotype::hasAD, this::getData, filteringEngine); return dataByAllele.entrySet().stream() .filter(entry -> !vc.getReference().equals(entry.getKey())) .map(entry -> (double) nonMitochondrialOriginalAlignmentCount / entry.getValue().stream().mapToInt(Integer::intValue).sum() > maxNuMTFraction).collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java index 5e2c0868781..9b1ebc6d44a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java @@ -33,7 +33,7 @@ public ContaminationFilter(final List contaminationTables, final double co @Override public List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - // for every alt allele, a list of the depth and posterior pair + // for every alt allele, a list of the depth and posterior pairs of each sample final List>> depthsAndPosteriorsPerAllele = new ArrayList<>(); new IndexRange(0, vc.getNAlleles()-1).forEach(i -> depthsAndPosteriorsPerAllele.add(new ArrayList<>())); @@ -44,9 +44,9 @@ public List calculateErrorProbabilityForAlleles(final VariantContext vc, final double contaminationFromFile = contaminationBySample.getOrDefault(tumorGenotype.getSampleName(), defaultContamination); final double contamination = Math.max(0, Math.min(contaminationFromFile, 1 - EPSILON)); // handle file with contamination == 1 - final int[] ADs = tumorGenotype.getAD(); // AD is all alleles, while AF is alts only, hence the +1 offset + final int[] ADs = tumorGenotype.getAD(); final int totalAD = (int) MathUtils.sum(ADs); - final int[] altADs = Arrays.copyOfRange(ADs, 1, ADs.length); + final int[] altADs = Arrays.copyOfRange(ADs, 1, ADs.length); // get ADs of alts only // POPAF has only alt allele data final double[] negativeLog10AlleleFrequencies = VariantContextGetters.getAttributeAsDoubleArray(vc, GATKVCFConstants.POPULATION_AF_KEY, () -> new double[]{Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY}, Double.POSITIVE_INFINITY); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index d4876c5fbc1..67356e4f472 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -1,7 +1,10 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.utils.IndexRange; +import org.broadinstitute.hellbender.utils.Utils; import java.util.*; import java.util.function.Function; @@ -28,12 +31,15 @@ public ErrorProbabilities(final List filters, final VariantContex .entrySet().stream().filter(entry -> !entry.getValue().isEmpty()) .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (a, b) -> a, LinkedHashMap::new)); - // if vc has symbolic allele, remove it + // if vc has symbolic alleles, remove them from each filter list if (vc.hasSymbolicAlleles()) { - // can we assume it's the last allele? - int symIndex = numAltAlleles - 1; - alleleProbabilitiesByFilter.values().stream().forEach(probList -> probList.remove(symIndex)); + List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); + // convert allele index to alt allele index + List symAltIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-1).collect(Collectors.toList()); + + alleleProbabilitiesByFilter.replaceAll((k, v) -> removeItemsByIndex(v, symAltIndexes)); } + LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); // convert the data so we have a list of probabilities by allele instead of filter @@ -55,6 +61,16 @@ public ErrorProbabilities(final List filters, final VariantContex combinedErrorProbabilitiesByAllele.replaceAll(trueProb -> Mutect2FilteringEngine.roundFinitePrecisionErrors(1.0 - trueProb)); } + private List removeItemsByIndex(List probs, List indexesToRemove) { + List updated = new ArrayList<>(); + new IndexRange(0, probs.size()).forEach(i -> { + if (!indexesToRemove.contains(new Integer(i))) { + updated.add(probs.get(i)); + } + }); + return updated; + } + public List getCombinedErrorProbabilities() { return combinedErrorProbabilitiesByAllele; } public List getTechnicalArtifactProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.ARTIFACT); } public List getNonSomaticProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.NON_SOMATIC); } @@ -62,17 +78,17 @@ public ErrorProbabilities(final List filters, final VariantContex // helper functions for the few operations that still differ depending on whether the filter // is per variant or allele - public Map> getProbabilitiesForAlleleFilters() { + public LinkedHashMap> getProbabilitiesForAlleleFilters() { return getPartitionedProbabilitiesByFilter(false); } - public Map getProbabilitiesForVariantFilters() { + public LinkedHashMap getProbabilitiesForVariantFilters() { return getPartitionedProbabilitiesByFilter(true).entrySet().stream() .filter(entry -> entry.getValue() != null && !entry.getValue().isEmpty()) - .collect(toMap(entry -> entry.getKey(), entry -> entry.getValue().get(0))); + .collect(toMap(entry -> entry.getKey(), entry -> entry.getValue().get(0), (a,b) -> b, LinkedHashMap::new)); } - private Map> getPartitionedProbabilitiesByFilter(boolean variantOnly) { + private LinkedHashMap> getPartitionedProbabilitiesByFilter(boolean variantOnly) { Map>> groups = alleleProbabilitiesByFilter.entrySet().stream().collect(Collectors.partitioningBy( entry -> Mutect2VariantFilter.class.isAssignableFrom(entry.getKey().getClass()), @@ -80,18 +96,15 @@ private Map> getPartitionedProbabilitiesByFilter(boo return groups.get(variantOnly); } - // TODO would this be useful in a util class somewhere? - private static List> transpose(List> list) { + public static List> transpose(List> list) { // all lists need to be the same size - final int N = list.stream().mapToInt(l -> l.size()).max().orElse(-1); - if (list.stream().anyMatch(l -> l.size() != N)) { - - } - List> iterList = list.stream().map(it->it.iterator()).collect(toList()); - return IntStream.range(0, N) + Utils.validateArg(!list.isEmpty() && list.stream().map(List::size).distinct().count() == 1, "lists are not the same size"); + List> iterList = list.stream().map(it -> it.iterator()).collect(toList()); + return IntStream.range(0, list.get(0).size()) .mapToObj(n -> iterList.stream() .filter(it -> it.hasNext()) .map(m -> m.next()) .collect(toList())) .collect(toList()); - }} + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java index 30c2495a88e..af75343be4f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import com.google.common.primitives.Doubles; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; @@ -9,7 +10,6 @@ import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import java.util.*; -import java.util.function.Predicate; import java.util.stream.Collectors; public class MinAlleleFractionFilter extends HardAlleleFilter { @@ -22,18 +22,14 @@ public MinAlleleFractionFilter(final double minAf) { @Override public ErrorType errorType() { return ErrorType.ARTIFACT; } - public Predicate checkPreconditions() { - return g -> g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY); - } - - public List getAltData(Genotype g) { + public List getAltData(final Genotype g) { double[] data = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> null, 1.0); - return Arrays.stream(data).boxed().collect(Collectors.toList()); + return Doubles.asList(data); } @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - LinkedHashMap> dataByAllele = getAltDataByAllele(vc, checkPreconditions(), this::getAltData, filteringEngine); + LinkedHashMap> dataByAllele = getAltDataByAllele(vc, g -> g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY), this::getAltData, filteringEngine); return dataByAllele.entrySet().stream() .filter(entry -> !vc.getReference().equals(entry.getKey())) .map(entry -> entry.getValue().stream().max(Double::compare).orElse(1.0) < minAf).collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 4f2cfd5d1cf..7f65f5886af 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -12,7 +12,6 @@ import org.apache.commons.math3.util.MathArrays; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; -import org.broadinstitute.hellbender.tools.walkers.annotator.StrandBiasTest; import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2Engine; import org.broadinstitute.hellbender.tools.walkers.mutect.MutectStats; import org.broadinstitute.hellbender.tools.walkers.mutect.clustering.SomaticClusteringModel; @@ -102,13 +101,14 @@ public int[] sumADsOverSamples(final VariantContext vc, final boolean includeTum return ADs; } - public int[] sumStrandCountsOverSamples(final VariantContext vc, final boolean includeTumor, final boolean includeNormal) { - final int[] result = new int[4]; - vc.getGenotypes().stream().filter(g -> (includeTumor && isTumor(g)) || (includeNormal && isNormal(g))) - .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) - .map(g -> StrandBiasTest.getStrandCounts(g)).forEach(sbbs -> new IndexRange(0, 4).forEach(n -> result[n] += sbbs[n])); - return result; - } + // TODO verify not needed and remove +// public int[] sumStrandCountsOverSamples(final VariantContext vc, final boolean includeTumor, final boolean includeNormal) { +// final int[] result = new int[4]; +// vc.getGenotypes().stream().filter(g -> (includeTumor && isTumor(g)) || (includeNormal && isNormal(g))) +// .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) +// .map(g -> StrandBiasTest.getStrandCounts(g)).forEach(sbbs -> new IndexRange(0, 4).forEach(n -> result[n] += sbbs[n])); +// return result; +// } public double[] weightedAverageOfTumorAFs(final VariantContext vc) { final MutableDouble totalWeight = new MutableDouble(0); @@ -185,19 +185,34 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext Map siteFiltersWithErrorProb = new LinkedHashMap<>(); // apply allele specific filters - List> ASFilters = + List> alleleStatusByFilter = errorProbabilities.getProbabilitiesForAlleleFilters().entrySet().stream() .filter(entry -> !entry.getValue().isEmpty()) - .map(entry -> addFilterStrings(entry.getValue(), siteFiltersWithErrorProb, errorThreshold, entry.getKey().filterName())).collect(Collectors.toList()); + .map(entry -> addFilterStrings(entry.getValue(), errorThreshold, entry.getKey().filterName())).collect(Collectors.toList()); + + // for each allele, merge all allele specific filters +// List> ASFiltersIterator = ASFilters.stream().map(list -> list.listIterator()).collect(Collectors.toList()); + List> filtersByAllele = ErrorProbabilities.transpose(alleleStatusByFilter); + List> distinctFiltersByAllele = filtersByAllele.stream().map(this::getDistinctFiltersForAllele).collect(Collectors.toList()); + ListIterator mergedFilterStringByAllele = distinctFiltersByAllele.stream().map(AnnotationUtils::encodeStringList).collect(Collectors.toList()).listIterator(); List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> allele.isReference() || allele.isSymbolic() ? - VCFConstants.EMPTY_INFO_FIELD : getMergedFilterStringForAllele(ASFilters)).collect(Collectors.toList()); + VCFConstants.EMPTY_INFO_FIELD : mergedFilterStringByAllele.next()).collect(Collectors.toList()); String finalAttrString = AnnotationUtils.encodeAnyASList(orderedASFilterStrings); vcb.putAttributes(Collections.singletonMap(GATKVCFConstants.AS_FILTER_STATUS_KEY, finalAttrString)); // compute site-only filters + // from allele specific filters + alleleStatusByFilter.stream().forEachOrdered(alleleStatusForFilter -> { + if (!alleleStatusForFilter.isEmpty() && alleleStatusForFilter.stream().distinct().count() == 1 && !alleleStatusForFilter.contains(VCFConstants.PASSES_FILTERS_v4)) { + siteFiltersWithErrorProb.put(alleleStatusForFilter.get(0), 1.0); + } + }); + + + // from variant filters errorProbabilities.getProbabilitiesForVariantFilters().entrySet().stream() .forEach(entry -> { entry.getKey().phredScaledPosteriorAnnotationName().ifPresent(annotation -> { @@ -211,8 +226,15 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext }); - // TO reviewers - should there be a flag where this is skipped and all filters are in the output vcf? - // otherwise things may seem erroneous. and should we apply this type of limit on the allele specific filters too? + // if all alleles have been filtered out, but for different reasons, fail the site. + // if the site is only ref and symbolic, no filters will be applied so don't fail + if (siteFiltersWithErrorProb.isEmpty() && !distinctFiltersByAllele.stream().allMatch(List::isEmpty)) { + // if any allele passed, don't fail the site + if (!distinctFiltersByAllele.stream().flatMap(List::stream).anyMatch(f -> f.equals(VCFConstants.PASSES_FILTERS_v4))) { + // we know the allele level filters exceeded their threshold - so set this prob to 1 + siteFiltersWithErrorProb.put(GATKVCFConstants.FAIL, 1.0); + } + } // this code limits the number of filters specified for any variant to the highest probability filters // this will not change the status of whether a variant is actually filtered or not @@ -227,38 +249,32 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext } /** - * Creates a comma separated string of all the filters that apply to the allele. This is basically - * a pivot of the data. we have filterlist -> allele -> filterName. and we want allele -> list of filterName - * @param alleleSpecificFilters all of the allele specific filters with the allele filter info + * Creates a comma separated string of all the filters that apply to the allele. + * @param filtersForAllele all the filters applied to the allele * @return encoded (comma separated) list of filters that apply to the allele */ - private String getMergedFilterStringForAllele(List> alleleSpecificFilters) { + private List getDistinctFiltersForAllele(List filtersForAllele) { // loop through each filter and pull out the filters the specified allele - List results = alleleSpecificFilters.stream().map(alleleValuesIterator -> alleleValuesIterator.next()).distinct().collect(Collectors.toList()); + List results = filtersForAllele.stream().distinct().collect(Collectors.toList()); if (results.size() > 1 && results.contains(VCFConstants.PASSES_FILTERS_v4)) { results.remove(VCFConstants.PASSES_FILTERS_v4); } else if (results.isEmpty()) { results.add(VCFConstants.PASSES_FILTERS_v4); } - return AnnotationUtils.encodeStringList(results); + return results; } /** - * For each allele, determine whether the filter should be applied. also determine if the filter should apply to the site - * @param probabilities the probability computed by the filter for the allele - * @param siteFiltersWithErrorProb in/out parameter that is collecting site level filters with the max error probability + * For each allele, determine whether the filter should be applied and return either the + * filter name or PASS + * @param probabilities the probabilities computed by the filter for the alleles * @param errorThreshold the theshold to use to determine whether filter applies - * @param filterName the name of the filter used in the vcf - * @return Iterator of filters for an allele + * @param filterName the name of the filter being evaluated + * @return List of filtername or "PASS" for each allele */ - private Iterator addFilterStrings(List probabilities, Map siteFiltersWithErrorProb, double errorThreshold, String filterName) { - List results = probabilities.stream().map(value -> value > errorThreshold ? + private List addFilterStrings(List probabilities, double errorThreshold, String filterName) { + return probabilities.stream().map(value -> value > errorThreshold ? filterName : VCFConstants.PASSES_FILTERS_v4).collect(Collectors.toList()); - if (!results.isEmpty() && results.stream().allMatch(x -> x.equals(filterName))) { - // TODO is this the correct default - siteFiltersWithErrorProb.put(filterName, probabilities.stream().max(Double::compareTo).orElse(0.0)); - } - return results.iterator(); } public static double roundFinitePrecisionErrors(final double probability) { @@ -277,7 +293,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { filters.add(new TumorEvidenceFilter()); filters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); - filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); + filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); // test filters.add(new StrandArtifactFilter()); // test gvcf filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); // test gvcf @@ -304,7 +320,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { } if (MTFAC.mitochondria) { - filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); // TODO convert!! + filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); filters.add(new NuMTFilter(MTFAC.medianAutosomalCoverage, MTFAC.maxNuMTAutosomalCopies)); } else { filters.add(new ClusteredEventsFilter(MTFAC.maxEventsInRegion)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java index 4b07a6a8559..fc2a4dec5e6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java @@ -5,6 +5,7 @@ import org.broadinstitute.hellbender.utils.IndexRange; import java.util.ArrayList; +import java.util.Collections; import java.util.List; public abstract class Mutect2VariantFilter extends Mutect2Filter { @@ -15,10 +16,7 @@ public List errorProbabilities(final VariantContext vc, final Mutect2Fil int numAltAlleles = vc.getNAlleles() - 1; final double result = Mutect2FilteringEngine.roundFinitePrecisionErrors(requiredAnnotations().stream().allMatch(vc::hasAttribute) ? calculateErrorProbability(vc, filteringEngine, referenceContext) : 0.0); - ArrayList resultList = new ArrayList<>(numAltAlleles); - new IndexRange(0, numAltAlleles).forEach(i -> resultList.add(result)); - return resultList; - + return Collections.nCopies(numAltAlleles, result); } protected abstract double calculateErrorProbability(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java index 9f6cc451229..b8bf6566d2c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java @@ -8,7 +8,6 @@ import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import java.util.*; -import java.util.function.Predicate; import java.util.stream.Collectors; @@ -28,17 +27,13 @@ public NuMTFilter(final double medianAutosomalCoverage, final double maxNuMTCopi @Override public ErrorType errorType() { return ErrorType.NON_SOMATIC; } - public Predicate checkPreconditions() { - return Genotype::hasAD; - } - public List getData(Genotype g) { return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); } @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - LinkedHashMap> dataByAllele = getDataByAllele(vc, checkPreconditions(), this::getData, filteringEngine); + LinkedHashMap> dataByAllele = getDataByAllele(vc, Genotype::hasAD, this::getData, filteringEngine); return dataByAllele.entrySet().stream() .filter(entry -> !vc.getReference().equals(entry.getKey())) .map(entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff).collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index 666b93343c1..c8520485537 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -1,11 +1,13 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; import com.google.common.annotations.VisibleForTesting; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.math3.util.CombinatoricsUtils; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.tools.walkers.validation.basicshortmutpileup.BetaBinomialDistribution; +import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.OptimizationUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -13,6 +15,7 @@ import java.util.*; import java.util.function.DoubleUnaryOperator; import java.util.stream.Collectors; +import java.util.stream.IntStream; public class StrandArtifactFilter extends Mutect2AlleleFilter { // beta prior on strand bias allele fraction @@ -56,21 +59,34 @@ public List calculateArtifactProbabilities(final VariantContext vc, final if (sbs == null || sbs.isEmpty() || sbs.size() <= 1) { return Collections.emptyList(); } + // remove symbolic alleles + if (vc.hasSymbolicAlleles()) { + final List> unfilteredSbs = new ArrayList<>(sbs); + sbs.clear(); + List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); + List symIndexes = vc.getAlleleIndices(symbolicAlleles); + new IndexRange(0, sbs.size()).forEach(i -> { + if (!symIndexes.contains(new Integer(i))) { + sbs.add(unfilteredSbs.get(i)); + } + }); + } - final ListIterator indelSizeIterator = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList()).listIterator(); - int totalFwd = sbs.stream().map(sb -> sb.get(0)).reduce(0, Math::addExact); - int totalRev = sbs.stream().map(sb -> sb.get(1)).reduce(0, Math::addExact); + final List indelSizes = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList()); + int totalFwd = sbs.stream().map(sb -> sb.get(0)).mapToInt(i -> i).sum(); + int totalRev = sbs.stream().map(sb -> sb.get(1)).mapToInt(i -> i).sum(); // skip the reference - List> altSBs = sbs.subList(1, sbs.size()); + final List> altSBs = sbs.subList(1, sbs.size()); - return altSBs.stream().map(altSB -> { - final int altIndelSize = indelSizeIterator.next(); + return IntStream.range(0, altSBs.size()).mapToObj(i -> { + final List altSB = altSBs.get(i); + final int altIndelSize = indelSizes.get(i); if (altSB.stream().mapToInt(Integer::intValue).sum() == 0 || altIndelSize > LONGEST_STRAND_ARTIFACT_INDEL_SIZE) { return new EStep(0, 0, totalFwd, totalRev, altSB.get(0), altSB.get(1)); } else { return strandArtifactProbability(strandArtifactPrior, totalFwd, totalRev, altSB.get(0), altSB.get(1), altIndelSize); } - }).collect(Collectors.toList()); + }).collect(Collectors.toList()); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java index 6b850f4663c..7ab922e2a7e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java @@ -4,11 +4,12 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.mutect.clustering.Datum; import org.broadinstitute.hellbender.tools.walkers.mutect.clustering.SomaticClusteringModel; -import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; public class TumorEvidenceFilter extends Mutect2AlleleFilter { @Override @@ -22,12 +23,10 @@ protected List calculateErrorProbabilityForAlleles(final VariantContext final int totalCount = (int) MathUtils.sum(ADs); SomaticClusteringModel model = filteringEngine.getSomaticClusteringModel(); - List altResults = new ArrayList<>(); - // 0 is the correct value. problem with threshold - new IndexRange(0, tumorLods.length).forEach(i -> - altResults.add(model.probabilityOfSequencingError(new Datum(tumorLods[i], 0, 0, ADs[i+1], totalCount, SomaticClusteringModel.indelLength(vc, i))))); + return IntStream.range(0, tumorLods.length).mapToObj(i -> + new Datum(tumorLods[i], 0, 0, ADs[i+1], totalCount, SomaticClusteringModel.indelLength(vc, i))) + .map(model::probabilityOfSequencingError).collect(Collectors.toList()); - return altResults; } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index 17501221ee4..c2808b25afc 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -164,7 +164,8 @@ their names (or descriptions) depend on some threshold. Those filters are not i public final static String CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME = "numt_chimera"; //mitochondria public final static String ALLELE_FRACTION_FILTER_NAME = "low_allele_frac"; public static final String POSSIBLE_NUMT_FILTER_NAME = "possible_numt"; - public static final String LOW_HET_FILTER_NAME = "low_het"; + public static final String LOW_HET_FILTER_NAME = "mt_low_het"; + public static final String FAIL = "FAIL"; public static final List MUTECT_FILTER_NAMES = Arrays.asList(VCFConstants.PASSES_FILTERS_v4, POLYMERASE_SLIPPAGE, PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_EVIDENCE_FILTER_NAME, GERMLINE_RISK_FILTER_NAME, @@ -173,7 +174,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, READ_POSITION_FILTER_NAME, CONTAMINATION_FILTER_NAME, DUPLICATED_EVIDENCE_FILTER_NAME, READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME, CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME, - STRICT_STRAND_BIAS_FILTER_NAME, N_RATIO_FILTER_NAME, ALLELE_FRACTION_FILTER_NAME, POSSIBLE_NUMT_FILTER_NAME); + STRICT_STRAND_BIAS_FILTER_NAME, N_RATIO_FILTER_NAME, ALLELE_FRACTION_FILTER_NAME, POSSIBLE_NUMT_FILTER_NAME, FAIL); public static final List MUTECT_AS_FILTER_NAMES = Arrays.asList(AS_FILTER_STATUS_KEY); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index a27c7951a74..ca253cdf0a4 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -100,6 +100,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addFilterLine(new VCFFilterHeaderLine(CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME, "NuMT variant with too many ALT reads originally from autosome")); addFilterLine(new VCFFilterHeaderLine(POSSIBLE_NUMT_FILTER_NAME, "Allele depth is below expected coverage of NuMT in autosome")); addFilterLine(new VCFFilterHeaderLine(LOW_HET_FILTER_NAME, "All low heteroplasmy sites are filtered when at least x low het sites pass all other filters")); + addFilterLine(new VCFFilterHeaderLine(FAIL, "Fail the site if all alleles fail but for different reasons.")); addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); addFormatLine(new VCFFormatHeaderLine(MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 01d170b12c0..b5217fcb38a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -518,14 +518,17 @@ public Object[][] vcfsForFiltering() { GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), Collections.emptySet(), Collections.emptySet(), - Collections.emptySet()), + Collections.emptySet(), + ImmutableSet.of(GATKVCFConstants.FAIL)), Arrays.asList( Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strict_stand Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), // .|numt_chimera Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4) // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // .|weak_evidence|low_allele_frac, possible_numt + )}, {NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( Collections.emptySet(), diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java similarity index 90% rename from src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTest.java rename to src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java index 1a84a724c75..a45c86a52a0 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java @@ -8,7 +8,7 @@ import java.io.IOException; import java.util.Arrays; -public class MTLowHeteroplasmyFilterTest extends CommandLineProgramTest { +public class MTLowHeteroplasmyFilterToolTest extends CommandLineProgramTest { private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); private static final File NA12878_MITO_FILTERED_VCF = new File(toolsTestDir, "mutect/mito/filtered.vcf"); @@ -18,7 +18,7 @@ public void testLowHetVariantWalker() throws IOException { " -R " + MITO_REF.getAbsolutePath() + " -V " + NA12878_MITO_FILTERED_VCF + " -O %s", - Arrays.asList(toolsTestDir + "mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt") + Arrays.asList(toolsTestDir + "mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf") ); testSpec.executeTest("testLowHetVariantWalker", this); @@ -27,7 +27,7 @@ public void testLowHetVariantWalker() throws IOException { " -V " + NA12878_MITO_FILTERED_VCF + " -O %s" + " --min-low-het-sites 5", - Arrays.asList(toolsTestDir + "mutect/mito/expected_LowHetNone_output.txt") + Arrays.asList(toolsTestDir + "mutect/mito/expected_LowHetNone_output.vcf") ); testLowHetNoneSpec.executeTest("testLowHetVariantWalker", this); } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt index 976f3abac55..54c682c71d5 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt @@ -7,7 +7,7 @@ ##FILTER= ##FILTER= ##FILTER= -##FILTER= +##FILTER= ##FILTER= ##FILTER= ##FILTER= diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt index b8b7328183e..f087fc0d5ac 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt @@ -7,7 +7,7 @@ ##FILTER= ##FILTER= ##FILTER= -##FILTER= +##FILTER= ##FILTER= ##FILTER= ##FILTER= @@ -61,9 +61,9 @@ ##source=FilterMutectCalls ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chrM 152 . T C . low_het AS_FilterStatus=.|PASS;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -chrM 263 . A G . low_het;numt_chimera AS_FilterStatus=.|PASS;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 -chrM 301 . A AC . low_allele_frac;low_het;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . low_het AS_FilterStatus=.|PASS|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 -chrM 310 . T TC . low_het AS_FilterStatus=.|PASS;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 -chrM 750 . A G . low_het AS_FilterStatus=.|PASS;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true +chrM 152 . T C . mt_low_het AS_FilterStatus=.|PASS;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . mt_low_het;numt_chimera AS_FilterStatus=.|PASS;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 301 . A AC . low_allele_frac;mt_low_het;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . mt_low_het AS_FilterStatus=.|PASS|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . mt_low_het AS_FilterStatus=.|PASS;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . mt_low_het AS_FilterStatus=.|PASS;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf index e93973ca0c7..da8e97bf1d7 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf @@ -43,3 +43,4 @@ chrM 301 . A AC . . AS_SB_TABLE=500,149|41,12;DP=680;ECNT=4;TLOD=3.32;POPAF=5.00 chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0,5|51,350|60,7|49,0;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 chrM 310 . T TC . . AS_SB_TABLE=0,0|300,358;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 chrM 750 . A G . . AS_SB_TABLE=0,1|100,1424;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true +chrM 802 . A C,ACC . . AS_SB_TABLE=5,100|51,250|60,7;DP=659;ECNT=4;TLOD=10.66,891.23;POPAF=5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2:105,301,67:0.636,0.141:2,163,35:3,238,32:20,20,30:419,316,340:60,60,60:41,33 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered.vcf index 6ab08c32803..c24b90cd6b5 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered.vcf @@ -41,4 +41,5 @@ chrM 263 . A G . . DP=858;ECNT=4;TLOD=2641.72;POPAF=5.000e-08;OCM=800 GT:AD:AF:F chrM 301 . A AC . . DP=680;ECNT=4;TLOD=3.32;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 chrM 302 . A AC,C,ACC . . DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 chrM 310 . T TC . . DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 400 . A AC,C,ACC . . DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 chrM 750 . A G . . DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 03754fc224e39cbfa91afd37e7bf3c3209703a64 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 24 Jan 2020 10:12:27 -0500 Subject: [PATCH 33/85] changes after rebase and to test FAIL filter --- .../tools/walkers/mutect/filtering/ErrorProbabilities.java | 5 ++++- .../walkers/mutect/filtering/MinAlleleFractionFilter.java | 3 +-- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 7 ++++--- .../hellbender/tools/mutect/mito/unfiltered-with-assb.vcf | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 67356e4f472..d081c4b21b1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -98,7 +98,10 @@ private LinkedHashMap> getPartitionedProbabilitiesBy public static List> transpose(List> list) { // all lists need to be the same size - Utils.validateArg(!list.isEmpty() && list.stream().map(List::size).distinct().count() == 1, "lists are not the same size"); + if (list.isEmpty()) { + return list; + } + Utils.validateArg(list.stream().map(List::size).distinct().count() == 1, "lists are not the same size"); List> iterList = list.stream().map(it -> it.iterator()).collect(toList()); return IntStream.range(0, list.get(0).size()) .mapToObj(n -> iterList.stream() diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java index af75343be4f..c04facde576 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java @@ -5,7 +5,6 @@ import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; @@ -23,7 +22,7 @@ public MinAlleleFractionFilter(final double minAf) { public ErrorType errorType() { return ErrorType.ARTIFACT; } public List getAltData(final Genotype g) { - double[] data = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> null, 1.0); + double[] data = VariantContextGetters.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> null, 1.0); return Doubles.asList(data); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index b5217fcb38a..863210228f3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -15,6 +15,7 @@ import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReadThreadingAssemblerArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReferenceConfidenceMode; @@ -511,7 +512,7 @@ public void testMitochondria() { public Object[][] vcfsForFiltering() { return new Object[][]{ {NA12878_MITO_VCF, 0.5, 30, Collections.emptyList(), Arrays.asList( - ImmutableSet.of(GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), + ImmutableSet.of(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), ImmutableSet.of(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), ImmutableSet.of( GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, @@ -521,13 +522,13 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.FAIL)), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strict_stand + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strand_bias, strict_stand Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), // .|numt_chimera Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // .|weak_evidence|low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // .|weak_evidence, strand_bias, strict_stand|low_allele_frac, possible_numt )}, {NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf index da8e97bf1d7..4afa7b91d99 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf @@ -43,4 +43,4 @@ chrM 301 . A AC . . AS_SB_TABLE=500,149|41,12;DP=680;ECNT=4;TLOD=3.32;POPAF=5.00 chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0,5|51,350|60,7|49,0;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 chrM 310 . T TC . . AS_SB_TABLE=0,0|300,358;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 chrM 750 . A G . . AS_SB_TABLE=0,1|100,1424;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true -chrM 802 . A C,ACC . . AS_SB_TABLE=5,100|51,250|60,7;DP=659;ECNT=4;TLOD=10.66,891.23;POPAF=5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2:105,301,67:0.636,0.141:2,163,35:3,238,32:20,20,30:419,316,340:60,60,60:41,33 +chrM 802 . A C,ACC . . AS_SB_TABLE=55,50|0,301|30,37;DP=659;ECNT=4;TLOD=10.66,891.23;POPAF=5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2:105,301,67:0.636,0.141:2,163,35:3,238,32:20,20,30:419,316,340:60,60,60:41,33 From 98a9dc86e3686a3f000f62dc5d51eb5f8f54e13e Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 24 Jan 2020 13:16:26 -0500 Subject: [PATCH 34/85] fix issue with null value for SB annotation --- scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile | 5 ++--- .../walkers/annotator/allelespecific/StrandBiasUtils.java | 4 +++- .../walkers/mutect/clustering/SomaticClusteringModel.java | 2 +- ..._LowHetNone_output.txt => expected_LowHetNone_output.vcf} | 0 ...> expected_LowHetVariantWalkerIntegrationTest_output.vcf} | 0 5 files changed, 6 insertions(+), 5 deletions(-) rename src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/{expected_LowHetNone_output.txt => expected_LowHetNone_output.vcf} (100%) rename src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/{expected_LowHetVariantWalkerIntegrationTest_output.txt => expected_LowHetVariantWalkerIntegrationTest_output.vcf} (100%) diff --git a/scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile b/scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile index 89242ee83f7..b2e265409e3 100644 --- a/scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile +++ b/scripts/mitochondria_m2_wdl/Haplochecker/Dockerfile @@ -9,6 +9,5 @@ RUN apt-get update && \ WORKDIR /usr/mtdnaserver # Download mitolib jar -#RUN wget https://github.com/haansi/mitolib/releases/download/0.1.2/mitolib-0.1.2.jar && \ -RUN wget https://github.com/leklab/haplocheckCLI/blob/master/haplocheckCLI.jar && \ - mv haplocheckCLI.jar mitolib.jar +RUN wget https://github.com/haansi/mitolib/releases/download/0.1.2/mitolib-0.1.2.jar && \ + mv mitolib-0.1.2.jar mitolib.jar diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java index c3c78d5f0cc..d8e6a4d3e30 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java @@ -23,7 +23,8 @@ public static Map computeSBAnnotation(VariantContext vc, AlleleL final Map annotations = new HashMap<>(); final ReducibleAnnotationData> myData = new AlleleSpecificAnnotationData<>(vc.getAlleles(),null); getStrandCountsFromLikelihoodMap(vc, likelihoods, myData, MIN_COUNT); - final Map> perAlleleValues = myData.getAttributeMap(); + Map> perAlleleValues = new LinkedHashMap<>(myData.getAttributeMap()); + perAlleleValues.values().removeIf(Objects::isNull); final String annotationString = makeRawAnnotationString(vc.getAlleles(), perAlleleValues); annotations.put(key, annotationString); return annotations; @@ -31,6 +32,7 @@ public static Map computeSBAnnotation(VariantContext vc, AlleleL protected static String makeRawAnnotationString(final List vcAlleles, final Map> perAlleleValues) { final List alleleStrings = vcAlleles.stream() + // does not replace a null value with zero list - only if the key is not in the map .map(a -> perAlleleValues.getOrDefault(a, ZERO_LIST)) .map(StrandBiasUtils::encode) .collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java index ec016642820..14648677d45 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java @@ -95,7 +95,7 @@ public SomaticClusteringModel(final M2FiltersArgumentCollection MTFAC, final Lis * @param tumorLogOdds for alt alleles only * @param artifactProbabilities by alt allele, specifically technical artifact probabilities not including sequencing error, contamination, or germline variation * @param nonSomaticProbabilities by alt allele, probabilities that the variants are real but not somatic ie germline or contamination - * @param vc + * @param vc the variant context the data apply to */ public void record(int[] tumorADs, final double[] tumorLogOdds, final List artifactProbabilities, final List nonSomaticProbabilities, final VariantContext vc) { // set tumorAD to 0 for symbolic alleles so it won't contribute to overall AD diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf similarity index 100% rename from src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.txt rename to src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf similarity index 100% rename from src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.txt rename to src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf From c8bdeeeabcdea650ab7c1b8d894129282456c464 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 24 Jan 2020 14:14:37 -0500 Subject: [PATCH 35/85] fix output files for test --- .../hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf | 2 +- .../mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf index 54c682c71d5..9ce1fb9283f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf @@ -7,8 +7,8 @@ ##FILTER= ##FILTER= ##FILTER= -##FILTER= ##FILTER= +##FILTER= ##FILTER= ##FILTER= ##FILTER= diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf index f087fc0d5ac..e686ba4c6a0 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf @@ -7,8 +7,8 @@ ##FILTER= ##FILTER= ##FILTER= -##FILTER= ##FILTER= +##FILTER= ##FILTER= ##FILTER= ##FILTER= From fb956fbb0fa11725c7604f9d04286697c6557fc5 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 24 Jan 2020 17:03:53 -0500 Subject: [PATCH 36/85] remove warnings --- .../tools/walkers/mutect/filtering/ErrorProbabilities.java | 2 +- .../tools/walkers/mutect/filtering/StrandArtifactFilter.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index d081c4b21b1..1ee0521f664 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -64,7 +64,7 @@ public ErrorProbabilities(final List filters, final VariantContex private List removeItemsByIndex(List probs, List indexesToRemove) { List updated = new ArrayList<>(); new IndexRange(0, probs.size()).forEach(i -> { - if (!indexesToRemove.contains(new Integer(i))) { + if (!indexesToRemove.contains(i)) { updated.add(probs.get(i)); } }); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index c8520485537..de6a834528a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -66,7 +66,7 @@ public List calculateArtifactProbabilities(final VariantContext vc, final List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); List symIndexes = vc.getAlleleIndices(symbolicAlleles); new IndexRange(0, sbs.size()).forEach(i -> { - if (!symIndexes.contains(new Integer(i))) { + if (!symIndexes.contains(i)) { sbs.add(unfilteredSbs.get(i)); } }); From 5de3575669fbc1719a38d5b14d6200a9250bbb34 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 30 Jan 2020 12:37:19 -0500 Subject: [PATCH 37/85] update splitting alleles to include analyzing AS_FilterStatus and setting the correct filter fields --- .../tools/walkers/mutect/Mutect2.java | 2 +- .../walkers/mutect/filtering/NuMTFilter.java | 2 +- .../variant/GATKVariantContextUtils.java | 12 ++ ...ftAlignAndTrimVariantsIntegrationTest.java | 22 +++ .../expected_split_with_AS_filters.vcf | 137 ++++++++++++++++++ .../test_split_with_AS_filters.vcf | 120 +++++++++++++++ 6 files changed, 293 insertions(+), 2 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java index ca29da8bb56..2340397fc23 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java @@ -141,7 +141,7 @@ * gatk Mutect2 \ * -R reference.fa \ * -L chrM \ - * --mitochondria \ + * --mitochondria-mode \ * -I mitochondria.bam \ * -O mitochondria.vcf.gz * diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java index b8bf6566d2c..a7f70ea66de 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java @@ -16,7 +16,7 @@ public class NuMTFilter extends HardAlleleFilter { private final int maxAltDepthCutoff; public NuMTFilter(final double medianAutosomalCoverage, final double maxNuMTCopies){ - if (maxNuMTCopies > 0) { + if (maxNuMTCopies > 0 && medianAutosomalCoverage > 0) { final PoissonDistribution autosomalCoverage = new PoissonDistribution(medianAutosomalCoverage * maxNuMTCopies / 2.0); maxAltDepthCutoff = autosomalCoverage.inverseCumulativeProbability(1 - LOWER_BOUND_PROB); } else { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 7253716a138..c4792e597c9 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -17,6 +17,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.tools.walkers.genotyper.*; import org.broadinstitute.hellbender.utils.*; import org.broadinstitute.hellbender.utils.param.ParamUtils; @@ -1444,6 +1445,17 @@ public static List splitVariantContextToBiallelics(final Variant } } + // split allele specific filters + int index = vc.getAlleleIndex(alt); + String asfiltersStr = vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD); + List filtersList = AnnotationUtils.decodeAnyASListWithPrintDelim(asfiltersStr); + if (filtersList.size() > index) { + String filters = filtersList.get(index); + if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { + AnnotationUtils.decodeAnyASList(filters).stream().map(String::trim).forEach(filter -> builder.filter(filter)); + } + } + // subset INFO field annotations if available if genotype is called if (genotypeAssignmentMethodUsed != GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS && genotypeAssignmentMethodUsed != GenotypeAssignmentMethod.SET_TO_NO_CALL) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java index 16b1ab3a236..4890186870f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java @@ -7,6 +7,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; @@ -64,4 +65,25 @@ public void testLefAlignRequireReference(Path inputFile) throws IOException { ); spec.executeTest("testLeftAlignment--requireReference", this); } + + @Test + public void testSplitAllelesWithASFilters() throws IOException { + Path inputFile = testDataDir.resolve("test_split_with_AS_filters.vcf"); + final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); + + Path expectedOutputFile = testDataDir.resolve("expected_split_with_AS_filters.vcf"); + + final IntegrationTestSpec spec = new IntegrationTestSpec( + " -R " + MITO_REF.getAbsolutePath() + + " -V " + inputFile + + " -O %s" + + " --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE + " false" + + " --suppress-reference-path " + + " --" + LeftAlignAndTrimVariants.SPLIT_MULTIALLELEICS_LONG_NAME + + " --" + LeftAlignAndTrimVariants.KEEP_ORIGINAL_AC_LONG_NAME, + Collections.singletonList(expectedOutputFile.toString()) + ); + spec.executeTest("testLeftAlignment--" + expectedOutputFile.toString(), this); + + } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf new file mode 100644 index 00000000000..7575032baeb --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf @@ -0,0 +1,137 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##MutectVersion=2.2 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##reference=Homo_sapiens_assembly38.mt_only +##source=FilterMutectCalls +##tumor_sample=01C05110 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 +chrM 73 . A G . PASS AS_FilterStatus=.|PASS;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31148.48 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 +chrM 263 . A G . PASS AS_FilterStatus=.|PASS;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21726.37 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 +chrM 301 . A ACCC . blacklisted_site;weak_evidence AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:4327,161:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 +chrM 301 . A ACC . blacklisted_site AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:4327,533:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 +chrM 302 . A AC . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0/0/0/0:114,218:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A C . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0/0/0/0:114,786:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A ACC . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1/0/0/0:114,2335:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A ACCC . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/1/0/0:114,396:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A ACCCC . blacklisted_site;possible_numt;weak_evidence AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/1/0:114,25:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/0/1:114,433:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 310 . T C . base_qual;blacklisted_site AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:1,402:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 +chrM 310 . T TC . blacklisted_site AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:1,4195:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 +chrM 499 . G A . PASS AS_FilterStatus=.|PASS;DP=8754;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24401.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8443:1.000:8453:4,3064:2,4108:2,8,3991,4452 +chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34194.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 +chrM 824 . T C . possible_numt AS_FilterStatus=.|possible_numt;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=17.27 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.272e-03:10284:4925,18:5163,23:5090,5150,20,24 +chrM 827 . A G . PASS AS_FilterStatus=.|PASS;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34471.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 +chrM 1438 . A G . PASS AS_FilterStatus=.|PASS;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35249.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 +chrM 2706 . A G . PASS AS_FilterStatus=.|PASS;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34760.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 +chrM 3380 . G A . PASS AS_FilterStatus=.|PASS;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=638.42 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.041:10442:5044,209:4684,218:5228,4776,226,212 +chrM 3547 . A G . PASS AS_FilterStatus=.|PASS;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34395.11 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 +chrM 3552 . T C . PASS AS_FilterStatus=.|PASS;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=174.68 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.016:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 +chrM 3565 . A C . base_qual AS_FilterStatus=.|base_qual;DP=10330;ECNT=4;MBQ=30,10;MFRL=394,401;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=24.76 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9441,463:7.375e-03:9904:3959,41:4613,20:0|1:3547_A_G:3547:4647,4794,10,453 +chrM 3577 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=10292;ECNT=4;MBQ=30,10;MFRL=395,394;MMQ=60,60;MPOS=42;OCM=0;POPAF=2.40;TLOD=2.78 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9730,268:2.480e-03:9998:3662,30:4408,8:0|1:3547_A_G:3547:4405,5325,240,28 +chrM 4769 . A G . PASS AS_FilterStatus=.|PASS;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33487.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 +chrM 4820 . G A . PASS AS_FilterStatus=.|PASS;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33895.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 +chrM 4977 . T C . PASS AS_FilterStatus=.|PASS;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33801.64 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 +chrM 5629 . C T . PASS AS_FilterStatus=.|PASS;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=293.78 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10034,242:0.022:10276:5041,137:4759,100:5266,4768,128,114 +chrM 6473 . C T . PASS AS_FilterStatus=.|PASS;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34747.10 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 +chrM 6722 . G A . PASS AS_FilterStatus=.|PASS;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34924.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 +chrM 7028 . C T . PASS AS_FilterStatus=.|PASS;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35080.38 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 +chrM 7241 . A G . PASS AS_FilterStatus=.|PASS;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33416.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 +chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=.|PASS;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26845.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 +chrM 8736 . T C . PASS AS_FilterStatus=.|PASS;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=2958.33 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.137:10498:4635,731:4237,699:4550,4495,778,675 +chrM 8860 . A G . PASS AS_FilterStatus=.|PASS;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35049.07 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:1.000:10584:4,5291:6,4983:5,6,5415,5158 +chrM 9098 . T C . PASS AS_FilterStatus=.|PASS;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35204.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 +chrM 9950 . T C . PASS AS_FilterStatus=.|PASS;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34440.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 +chrM 10818 . AAC A . possible_numt AS_FilterStatus=.|possible_numt;DP=10642;ECNT=1;MBQ=30,30;MFRL=398,413;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;RPA=2,1;RU=AC;STR;TLOD=124.41 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10048,73:7.193e-03:10121:5053,34:4549,37:4903,5145,38,35 +chrM 11177 . C T . PASS AS_FilterStatus=.|PASS;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34441.45 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 +chrM 11276 . T C . PASS AS_FilterStatus=.|PASS;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1691.31 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.089:10353:4797,486:4475,427:4746,4664,480,463 +chrM 11719 . G A . PASS AS_FilterStatus=.|PASS;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34853.42 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 +chrM 13590 . G A . PASS AS_FilterStatus=.|PASS;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34042.44 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 +chrM 13606 . A G . PASS AS_FilterStatus=.|PASS;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=405.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10056,330:0.030:10386:5060,168:4774,151:5057,4999,175,155 +chrM 14766 . C T . PASS AS_FilterStatus=.|PASS;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33444.52 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 +chrM 15326 . A G . PASS AS_FilterStatus=.|PASS;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34380.43 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 +chrM 15535 . C T . PASS AS_FilterStatus=.|PASS;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33908.94 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 +chrM 16149 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=4.29 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.797e-03:5198:2065,10:2653,12:1731,3292,5,170 +chrM 16175 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=4056;ECNT=8;MBQ=25,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3668,191:6.736e-03:3859:1180,16:2025,12:903,2765,6,185 +chrM 16179 . CAA C . blacklisted_site AC=1;AF=0.250;AN=4 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0:1476,647:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 +chrM 16179 . CA C . blacklisted_site AC=1;AF=0.250;AN=4 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0:1476,308:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 +chrM 16179 . CAAA C . blacklisted_site AC=1;AF=0.250;AN=4 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1:1476,264:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 +chrM 16181 . A C . base_qual AS_FilterStatus=.|base_qual;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=67.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2614,721:0.159:3335:698,60:1276,245:662,1952,27,694 +chrM 16182 . A C . blacklisted_site AS_FilterStatus=.|PASS;DP=3647;ECNT=8;MBQ=20,30;MFRL=396,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2961.98 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1200,1477:0.561:2677:230,442:606,872:245,955,137,1340 +chrM 16183 . A C . PASS AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0/0/0:97,1775:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16183 . A ACCC . PASS AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0/0/0:97,413:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16183 . A ACCCC . PASS AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1/0/0:97,244:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16183 . A ACCCCC . possible_numt;weak_evidence AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/1/0:97,50:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/1:97,69:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16189 . T C . PASS AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:3,3422:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 +chrM 16189 . T A . possible_numt;weak_evidence AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:3,6:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 +chrM 16217 . T C . PASS AS_FilterStatus=.|PASS;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14889.73 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 +chrM 16519 . T C . PASS AS_FilterStatus=.|PASS;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30690.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf new file mode 100644 index 00000000000..213c376eb03 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf @@ -0,0 +1,120 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GATKCommandLine= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##MutectVersion=2.2 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##source=FilterMutectCalls +##source=Mutect2 +##source=VariantFiltration +##tumor_sample=01C05110 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 +chrM 73 . A G . PASS AS_FilterStatus=.|PASS;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31148.48 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 +chrM 263 . A G . PASS AS_FilterStatus=.|PASS;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21726.37 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 +chrM 301 . A ACCC,ACC . blacklisted_site AS_FilterStatus=.|weak_evidence|PASS;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.81,70.60 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:4327,161,533:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 +chrM 302 . A AC,C,ACC,ACCC,ACCCC,ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=.|PASS|PASS|PASS|PASS|weak_evidence,possible_numt|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5/6:114,218,786,2335,396,25,433:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 310 . T C,TC . blacklisted_site AS_FilterStatus=.|base_qual|PASS;DP=5034;ECNT=4;MBQ=30,2,30;MFRL=169,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=105.51,12181.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:1,402,4195:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 +chrM 499 . G A . PASS AS_FilterStatus=.|PASS;DP=8754;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24401.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8443:1.000:8453:4,3064:2,4108:2,8,3991,4452 +chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34194.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 +chrM 824 . T C . possible_numt AS_FilterStatus=.|possible_numt;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=17.27 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.272e-03:10284:4925,18:5163,23:5090,5150,20,24 +chrM 827 . A G . PASS AS_FilterStatus=.|PASS;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34471.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 +chrM 1438 . A G . PASS AS_FilterStatus=.|PASS;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35249.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 +chrM 2706 . A G . PASS AS_FilterStatus=.|PASS;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34760.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 +chrM 3380 . G A . PASS AS_FilterStatus=.|PASS;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=638.42 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.041:10442:5044,209:4684,218:5228,4776,226,212 +chrM 3547 . A G . PASS AS_FilterStatus=.|PASS;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34395.11 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 +chrM 3552 . T C . PASS AS_FilterStatus=.|PASS;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=174.68 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.016:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 +chrM 3565 . A C . base_qual AS_FilterStatus=.|base_qual;DP=10330;ECNT=4;MBQ=30,10;MFRL=394,401;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=24.76 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9441,463:7.375e-03:9904:3959,41:4613,20:0|1:3547_A_G:3547:4647,4794,10,453 +chrM 3577 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=10292;ECNT=4;MBQ=30,10;MFRL=395,394;MMQ=60,60;MPOS=42;OCM=0;POPAF=2.40;TLOD=2.78 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9730,268:2.480e-03:9998:3662,30:4408,8:0|1:3547_A_G:3547:4405,5325,240,28 +chrM 4769 . A G . PASS AS_FilterStatus=.|PASS;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33487.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 +chrM 4820 . G A . PASS AS_FilterStatus=.|PASS;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33895.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 +chrM 4977 . T C . PASS AS_FilterStatus=.|PASS;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33801.64 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 +chrM 5629 . C T . PASS AS_FilterStatus=.|PASS;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=293.78 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10034,242:0.022:10276:5041,137:4759,100:5266,4768,128,114 +chrM 6473 . C T . PASS AS_FilterStatus=.|PASS;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34747.10 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 +chrM 6722 . G A . PASS AS_FilterStatus=.|PASS;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34924.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 +chrM 7028 . C T . PASS AS_FilterStatus=.|PASS;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35080.38 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 +chrM 7241 . A G . PASS AS_FilterStatus=.|PASS;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33416.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 +chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=.|PASS;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26845.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 +chrM 8736 . T C . PASS AS_FilterStatus=.|PASS;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=2958.33 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.137:10498:4635,731:4237,699:4550,4495,778,675 +chrM 8860 . A G . PASS AS_FilterStatus=.|PASS;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35049.07 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:1.000:10584:4,5291:6,4983:5,6,5415,5158 +chrM 9098 . T C . PASS AS_FilterStatus=.|PASS;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35204.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 +chrM 9950 . T C . PASS AS_FilterStatus=.|PASS;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34440.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 +chrM 10818 . AAC A . possible_numt AS_FilterStatus=.|possible_numt;DP=10642;ECNT=1;MBQ=30,30;MFRL=398,413;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;RPA=2,1;RU=AC;STR;TLOD=124.41 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10048,73:7.193e-03:10121:5053,34:4549,37:4903,5145,38,35 +chrM 11177 . C T . PASS AS_FilterStatus=.|PASS;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34441.45 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 +chrM 11276 . T C . PASS AS_FilterStatus=.|PASS;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1691.31 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.089:10353:4797,486:4475,427:4746,4664,480,463 +chrM 11719 . G A . PASS AS_FilterStatus=.|PASS;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34853.42 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 +chrM 13590 . G A . PASS AS_FilterStatus=.|PASS;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34042.44 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 +chrM 13606 . A G . PASS AS_FilterStatus=.|PASS;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=405.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10056,330:0.030:10386:5060,168:4774,151:5057,4999,175,155 +chrM 14766 . C T . PASS AS_FilterStatus=.|PASS;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33444.52 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 +chrM 15326 . A G . PASS AS_FilterStatus=.|PASS;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34380.43 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 +chrM 15535 . C T . PASS AS_FilterStatus=.|PASS;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33908.94 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 +chrM 16149 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=4.29 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.797e-03:5198:2065,10:2653,12:1731,3292,5,170 +chrM 16175 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=4056;ECNT=8;MBQ=25,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3668,191:6.736e-03:3859:1180,16:2025,12:903,2765,6,185 +chrM 16179 . CAAA CA,CAA,C . blacklisted_site AS_FilterStatus=.|PASS|PASS|PASS;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=917.43,324.91,75.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3:1476,647,308,264:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 +chrM 16181 . A C . base_qual AS_FilterStatus=.|base_qual;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=67.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2614,721:0.159:3335:698,60:1276,245:662,1952,27,694 +chrM 16182 . A C . blacklisted_site AS_FilterStatus=.|PASS;DP=3647;ECNT=8;MBQ=20,30;MFRL=396,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2961.98 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1200,1477:0.561:2677:230,442:606,872:245,955,137,1340 +chrM 16183 . A C,ACCC,ACCCC,ACCCCC,ACCCCCCCCCCCCCCCC . PASS AS_FilterStatus=.|PASS|PASS|PASS|weak_evidence,possible_numt|possible_numt;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5:97,1775,413,244,50,69:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16189 . T C,A . PASS AS_FilterStatus=.|PASS|weak_evidence,possible_numt;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14620.12,0.990 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:3,3422,6:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 +chrM 16217 . T C . PASS AS_FilterStatus=.|PASS;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14889.73 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 +chrM 16519 . T C . PASS AS_FilterStatus=.|PASS;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30690.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 From 2cb9b5c09307e5783d18ff713148e7bf662672a4 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 30 Jan 2020 15:52:01 -0500 Subject: [PATCH 38/85] fix extra spaces in filter list --- .../variant/GATKVariantContextUtils.java | 7 +++- .../expected_split_with_AS_filters.vcf | 40 +++++++++---------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index c4792e597c9..6f9b93ca9e0 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -1447,13 +1447,16 @@ public static List splitVariantContextToBiallelics(final Variant // split allele specific filters int index = vc.getAlleleIndex(alt); - String asfiltersStr = vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD); + // the reason we are getting as list and then joining on , is because the default getAttributeAsString for a list will add spaces between items which we don't + // want to have to trim out later in the code + String asfiltersStr = String.join(",", vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD)); List filtersList = AnnotationUtils.decodeAnyASListWithPrintDelim(asfiltersStr); if (filtersList.size() > index) { String filters = filtersList.get(index); if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { - AnnotationUtils.decodeAnyASList(filters).stream().map(String::trim).forEach(filter -> builder.filter(filter)); + AnnotationUtils.decodeAnyASList(filters).stream().forEach(filter -> builder.filter(filter)); } + builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeAnyASList(new ArrayList<>(Arrays.asList(filtersList.get(0), filters)))); } // subset INFO field annotations if available if genotype is called diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf index 7575032baeb..8a6c3919373 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf @@ -76,16 +76,16 @@ #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 chrM 73 . A G . PASS AS_FilterStatus=.|PASS;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31148.48 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 chrM 263 . A G . PASS AS_FilterStatus=.|PASS;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21726.37 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 -chrM 301 . A ACCC . blacklisted_site;weak_evidence AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:4327,161:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 -chrM 301 . A ACC . blacklisted_site AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:4327,533:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 -chrM 302 . A AC . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0/0/0/0:114,218:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A C . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0/0/0/0:114,786:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A ACC . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1/0/0/0:114,2335:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A ACCC . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/1/0/0:114,396:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A ACCCC . blacklisted_site;possible_numt;weak_evidence AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/1/0:114,25:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AC=1;AF=0.143;AN=7 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/0/1:114,433:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 310 . T C . base_qual;blacklisted_site AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:1,402:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 -chrM 310 . T TC . blacklisted_site AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:1,4195:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 +chrM 301 . A ACCC . blacklisted_site;weak_evidence AC=1;AF=0.333;AN=3;AS_FilterStatus=.|weak_evidence GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:4327,161:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 +chrM 301 . A ACC . blacklisted_site AC=1;AF=0.333;AN=3;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:4327,533:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 +chrM 302 . A AC . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0/0/0/0:114,218:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A C . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0/0/0/0:114,786:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A ACC . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1/0/0/0:114,2335:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A ACCC . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/1/0/0:114,396:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A ACCCC . blacklisted_site;possible_numt;weak_evidence AC=1;AF=0.143;AN=7;AS_FilterStatus=.|weak_evidence,possible_numt GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/1/0:114,25:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/0/1:114,433:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 +chrM 310 . T C . base_qual;blacklisted_site AC=1;AF=0.333;AN=3;AS_FilterStatus=.|base_qual GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:1,402:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 +chrM 310 . T TC . blacklisted_site AC=1;AF=0.333;AN=3;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:1,4195:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 chrM 499 . G A . PASS AS_FilterStatus=.|PASS;DP=8754;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24401.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8443:1.000:8453:4,3064:2,4108:2,8,3991,4452 chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34194.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 chrM 824 . T C . possible_numt AS_FilterStatus=.|possible_numt;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=17.27 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.272e-03:10284:4925,18:5163,23:5090,5150,20,24 @@ -121,17 +121,17 @@ chrM 15326 . A G . PASS AS_FilterStatus=.|PASS;DP=10698;ECNT=1;MBQ=15,30;MFRL=39 chrM 15535 . C T . PASS AS_FilterStatus=.|PASS;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33908.94 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 chrM 16149 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=4.29 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.797e-03:5198:2065,10:2653,12:1731,3292,5,170 chrM 16175 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=4056;ECNT=8;MBQ=25,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3668,191:6.736e-03:3859:1180,16:2025,12:903,2765,6,185 -chrM 16179 . CAA C . blacklisted_site AC=1;AF=0.250;AN=4 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0:1476,647:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 -chrM 16179 . CA C . blacklisted_site AC=1;AF=0.250;AN=4 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0:1476,308:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 -chrM 16179 . CAAA C . blacklisted_site AC=1;AF=0.250;AN=4 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1:1476,264:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 +chrM 16179 . CAA C . blacklisted_site AC=1;AF=0.250;AN=4;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0:1476,647:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 +chrM 16179 . CA C . blacklisted_site AC=1;AF=0.250;AN=4;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0:1476,308:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 +chrM 16179 . CAAA C . blacklisted_site AC=1;AF=0.250;AN=4;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1:1476,264:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 chrM 16181 . A C . base_qual AS_FilterStatus=.|base_qual;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=67.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2614,721:0.159:3335:698,60:1276,245:662,1952,27,694 chrM 16182 . A C . blacklisted_site AS_FilterStatus=.|PASS;DP=3647;ECNT=8;MBQ=20,30;MFRL=396,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2961.98 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1200,1477:0.561:2677:230,442:606,872:245,955,137,1340 -chrM 16183 . A C . PASS AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0/0/0:97,1775:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16183 . A ACCC . PASS AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0/0/0:97,413:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16183 . A ACCCC . PASS AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1/0/0:97,244:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16183 . A ACCCCC . possible_numt;weak_evidence AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/1/0:97,50:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AC=1;AF=0.167;AN=6 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/1:97,69:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16189 . T C . PASS AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:3,3422:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 -chrM 16189 . T A . possible_numt;weak_evidence AC=1;AF=0.333;AN=3 GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:3,6:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 +chrM 16183 . A C . PASS AC=1;AF=0.167;AN=6;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0/0/0:97,1775:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16183 . A ACCC . PASS AC=1;AF=0.167;AN=6;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0/0/0:97,413:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16183 . A ACCCC . PASS AC=1;AF=0.167;AN=6;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1/0/0:97,244:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16183 . A ACCCCC . possible_numt;weak_evidence AC=1;AF=0.167;AN=6;AS_FilterStatus=.|weak_evidence,possible_numt GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/1/0:97,50:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AC=1;AF=0.167;AN=6;AS_FilterStatus=.|possible_numt GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/1:97,69:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 +chrM 16189 . T C . PASS AC=1;AF=0.333;AN=3;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:3,3422:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 +chrM 16189 . T A . possible_numt;weak_evidence AC=1;AF=0.333;AN=3;AS_FilterStatus=.|weak_evidence,possible_numt GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:3,6:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 chrM 16217 . T C . PASS AS_FilterStatus=.|PASS;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14889.73 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 chrM 16519 . T C . PASS AS_FilterStatus=.|PASS;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30690.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 From 53d31a05ae43f99ecdbf39b43fee82132f7f79c9 Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Fri, 31 Jan 2020 13:13:48 -0500 Subject: [PATCH 39/85] Fixed AF and SB splitting; also some javadoc I should have done in the past and some refactoring I should have done in the past --- .../tools/genomicsdb/GenomicsDBUtils.java | 2 +- ...ferenceConfidenceVariantContextMerger.java | 194 +---------------- .../walkers/annotator/AnnotationUtils.java | 19 +- .../allelespecific/AS_RMSMappingQuality.java | 2 +- .../allelespecific/AS_RankSumTest.java | 4 +- .../allelespecific/StrandBiasUtils.java | 43 +++- .../genotyper/AlleleSubsettingUtils.java | 204 +++++++++++++++++- .../MTLowHeteroplasmyFilterTool.java | 14 +- .../filtering/MappingQualityFilter.java | 6 +- .../filtering/Mutect2FilteringEngine.java | 8 +- .../LeftAlignAndTrimVariants.java | 15 +- .../walkers/variantutils/ReblockGVCF.java | 9 +- .../utils/variant/GATKVCFHeaderLines.java | 1 + .../variant/GATKVariantContextUtils.java | 60 ++++-- ...onfidenceVariantContextMergerUnitTest.java | 114 +--------- .../AS_BaseQualityRankSumTestUnitTest.java | 2 +- .../AlleleSubsettingUtilsUnitTest.java | 110 ++++++++++ .../mutect/Mutect2IntegrationTest.java | 2 +- .../expected_split_with_AS_filters.vcf | 40 ++-- 19 files changed, 475 insertions(+), 374 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBUtils.java index aa5e45da91e..34dd8471b65 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBUtils.java @@ -238,7 +238,7 @@ public static GenomicsDBVidMapProto.VidMappingPB updateAlleleSpecificINFOFieldCo infoBuilder.addLength(lengthDescriptorComponentBuilder.build()); lengthDescriptorComponentBuilder.setVariableLengthDescriptor("var"); //ignored - can set anything here infoBuilder.addLength(lengthDescriptorComponentBuilder.build()); - infoBuilder.addVcfDelimiter(AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM); + infoBuilder.addVcfDelimiter(AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM); infoBuilder.addVcfDelimiter(AnnotationUtils.ALLELE_SPECIFIC_REDUCED_DELIM); if (newCombineOperation.equals(HISTOGRAM_SUM)) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java index 91a7981c40c..0dcb15ebc3a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java @@ -1,20 +1,17 @@ package org.broadinstitute.hellbender.tools.walkers; import com.google.common.annotations.VisibleForTesting; -import com.google.common.primitives.Doubles; -import com.google.common.primitives.Ints; import htsjdk.samtools.util.Locatable; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFInfoHeaderLine; -import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AlleleSpecificAnnotationData; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotationData; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator; +import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.Mutect2FilteringEngine; import org.broadinstitute.hellbender.utils.Utils; @@ -35,10 +32,10 @@ @SuppressWarnings({"rawtypes","unchecked"}) //TODO fix uses of untyped Comparable. public final class ReferenceConfidenceVariantContextMerger { - private final GenotypeLikelihoodCalculators calculators; + private static final GenotypeLikelihoodCalculators calculators = new GenotypeLikelihoodCalculators(); private static VCFHeader vcfInputHeader = null; protected final VariantAnnotatorEngine annotatorEngine; - private final boolean doSomaticMerge; + protected final boolean doSomaticMerge; protected boolean dropSomaticFilteringAnnotations; protected final OneShotLogger oneShotAnnotationLogger = new OneShotLogger(this.getClass()); protected final OneShotLogger oneShotHeaderLineLogger = new OneShotLogger(this.getClass()); @@ -64,7 +61,6 @@ public ReferenceConfidenceVariantContextMerger(VariantAnnotatorEngine engine, fi public ReferenceConfidenceVariantContextMerger(VariantAnnotatorEngine engine, final VCFHeader inputHeader, boolean somaticInput, boolean dropSomaticFilteringAnnotations) { Utils.nonNull(inputHeader, "A VCF header must be provided"); - calculators = new GenotypeLikelihoodCalculators(); annotatorEngine = engine; vcfInputHeader = inputHeader; doSomaticMerge = somaticInput; @@ -536,16 +532,16 @@ private GenotypesContext mergeRefConfidenceGenotypes(final VariantContext vc, if (!doSomaticMerge) { if (g.hasPL()) { // lazy initialization of the genotype index map by ploidy. - int[] perSampleIndexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, vc.getStart(), g); + int[] perSampleIndexesOfRelevantAlleles = AlleleSubsettingUtils.getIndexesOfRelevantAllelesForGVCF(remappedAlleles, targetAlleles, vc.getStart(), g, false); final int[] genotypeIndexMapByPloidy = genotypeIndexMapsByPloidy[ploidy] == null ? calculators.getInstance(ploidy, maximumAlleleCount).genotypeIndexMap(perSampleIndexesOfRelevantAlleles, calculators) //probably horribly slow : genotypeIndexMapsByPloidy[ploidy]; final int[] PLs = generatePL(g, genotypeIndexMapByPloidy); - final int[] AD = g.hasAD() ? generateAD(g.getAD(), perSampleIndexesOfRelevantAlleles) : null; + final int[] AD = g.hasAD() ? AlleleSubsettingUtils.generateAD(g.getAD(), perSampleIndexesOfRelevantAlleles) : null; genotypeBuilder.PL(PLs).AD(AD); } } - else { + else { //doSomaticMerge genotypeBuilder.noAttributes(); if (g.hasDP()) { genotypeBuilder.DP(g.getDP()); @@ -558,11 +554,11 @@ private GenotypesContext mergeRefConfidenceGenotypes(final VariantContext vc, } // lazy initialization of the genotype index map by ploidy. - int[] perSampleIndexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, vc.getStart(), g); + int[] perSampleIndexesOfRelevantAlleles = AlleleSubsettingUtils.getIndexesOfRelevantAllelesForGVCF(remappedAlleles, targetAlleles, vc.getStart(), g, false); final int nonRefIndex = remappedAlleles.indexOf(Allele.NON_REF_ALLELE); final int[] AD; if (g.hasAD()) { - AD = generateAD(g.getAD(), perSampleIndexesOfRelevantAlleles); + AD = AlleleSubsettingUtils.generateAD(g.getAD(), perSampleIndexesOfRelevantAlleles); genotypeBuilder.AD(AD); } else if (g.hasDP()) { AD = new int[targetAlleles.size()]; @@ -570,7 +566,7 @@ private GenotypesContext mergeRefConfidenceGenotypes(final VariantContext vc, genotypeBuilder.AD(AD); } if (g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY)) { //homRef calls don't have AF - final double[] AF = generateAF(VariantContextGetters.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> new double[]{0.0}, 0.0), perSampleIndexesOfRelevantAlleles); + final double[] AF = AlleleSubsettingUtils.generateAF(VariantContextGetters.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> new double[]{0.0}, 0.0), perSampleIndexesOfRelevantAlleles); genotypeBuilder.attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, AF); } else if ((g.isHomRef() || g.isNoCall()) && vc.getAlternateAlleles().size() == 1) { //homRef blocks don't get an AF so assign it here; multi-sample GVCFs will have no-call GTs for ref blocks @@ -654,122 +650,6 @@ private static int[] generatePL(final Genotype g, final int[] genotypeIndexMapBy return PLs; } - /** - * Determines the allele mapping from myAlleles to the targetAlleles, substituting the generic "" as appropriate. - * If the myAlleles set does not contain "" as an allele, it throws an exception. - * - * @param remappedAlleles the list of alleles to evaluate - * @param targetAlleles the target list of alleles - * @param position position to output error info - * @param g genotype from which targetAlleles are derived - * @return non-null array of ints representing indexes - */ - @VisibleForTesting - int[] getIndexesOfRelevantAlleles(final List remappedAlleles, final List targetAlleles, final int position, final Genotype g) { - - Utils.nonEmpty(remappedAlleles); - Utils.nonEmpty(targetAlleles); - - if ( !remappedAlleles.contains(Allele.NON_REF_ALLELE) ) { - throw new UserException("The list of input alleles must contain " + Allele.NON_REF_ALLELE + " as an allele but that is not the case at position " + position + "; please use the Haplotype Caller with gVCF output to generate appropriate records"); - } - - final int indexOfNonRef = remappedAlleles.indexOf(Allele.NON_REF_ALLELE); - final int[] indexMapping = new int[targetAlleles.size()]; - - // the reference likelihoods should always map to each other (even if the alleles don't) - indexMapping[0] = 0; - - // create the index mapping, using the allele whenever such a mapping doesn't exist - for ( int i = 1; i < targetAlleles.size(); i++ ) { - // if there's more than 1 DEL allele then we need to use the best one - if (targetAlleles.get(i) == Allele.SPAN_DEL && !doSomaticMerge && g.hasPL()) { - final int occurrences = Collections.frequency(remappedAlleles, Allele.SPAN_DEL); - if (occurrences > 1) { - final int indexOfBestDel = indexOfBestDel(remappedAlleles, g.getPL(), g.getPloidy()); - indexMapping[i] = (indexOfBestDel == -1 ? indexOfNonRef : indexOfBestDel); - continue; - } - } - - final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i)); - indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfNonRef : indexOfRemappedAllele; - } - - return indexMapping; - } - - /** - * Returns the index of the best spanning deletion allele based on AD counts - * - * @param alleles the list of alleles - * @param PLs the list of corresponding PL values - * @param ploidy the ploidy of the sample - * @return the best index or -1 if not found - */ - private int indexOfBestDel(final List alleles, final int[] PLs, final int ploidy) { - int bestIndex = -1; - int bestPL = Integer.MAX_VALUE; - - for ( int i = 0; i < alleles.size(); i++ ) { - if ( alleles.get(i) == Allele.SPAN_DEL ) { - final int homAltIndex = findHomIndex(i, ploidy, alleles.size()); - final int PL = PLs[homAltIndex]; - if ( PL < bestPL ) { - bestIndex = i; - bestPL = PL; - } - } - } - - return bestIndex; - } - - /** //TODO simplify these methods - * Returns the index of the PL that represents the homozygous genotype of the given i'th allele - * - * @param i the index of the allele with the list of alleles - * @param ploidy the ploidy of the sample - * @param numAlleles the total number of alleles - * @return the hom index - */ - private int findHomIndex(final int i, final int ploidy, final int numAlleles) { - // some quick optimizations for the common case - if ( ploidy == 2 ) - return GenotypeLikelihoods.calculatePLindex(i, i); - if ( ploidy == 1 ) - return i; - - final GenotypeLikelihoodCalculator calculator = calculators.getInstance(ploidy, numAlleles); - final int[] alleleIndexes = new int[ploidy]; - Arrays.fill(alleleIndexes, i); - return calculator.allelesToIndex(alleleIndexes); - } - - /** - * Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current - * alleles from the original AD. - * - * @param originalAD the original AD to extend - * @param indexesOfRelevantAlleles the indexes of the original alleles corresponding to the new alleles - * @return non-null array of new AD values - */ - public static int[] generateAD(final int[] originalAD, final int[] indexesOfRelevantAlleles) { - final List adList = (List)remapRLengthList(Arrays.stream(originalAD).boxed().collect(Collectors.toList()), indexesOfRelevantAlleles); - return Ints.toArray(adList); - } - - /** - * Generates a new AF (allele fraction) array - * @param originalAF - * @param indexesOfRelevantAlleles - * @return non-null array of new AFs - */ - public static double[] generateAF(final double[] originalAF, final int[] indexesOfRelevantAlleles) { - final List afList = (List)remapALengthList(Arrays.stream(originalAF).boxed().collect(Collectors.toList()), indexesOfRelevantAlleles); - return Doubles.toArray(afList); - } - /** * Generates a new annotation value array by adding zeros for missing alleles given the set of indexes of the Genotype's current * alleles from the original annotation value array. @@ -783,65 +663,13 @@ public static Object generateAnnotationValueVector(VCFHeaderLineCount alleleCoun final List originalList, final int[] indexesOfRelevantAlleles) { List newLODs = null; if (alleleCount.equals(VCFHeaderLineCount.A)) { - newLODs = remapALengthList(originalList, indexesOfRelevantAlleles); + newLODs = AlleleSubsettingUtils.remapALengthList(originalList, indexesOfRelevantAlleles); } else if (alleleCount.equals(VCFHeaderLineCount.R)) { - newLODs = remapRLengthList(originalList, indexesOfRelevantAlleles); + newLODs = AlleleSubsettingUtils.remapRLengthList(originalList, indexesOfRelevantAlleles); } else { //count doesn't depend on alleles newLODs = originalList; } return newLODs; } - /** - * Given a list of per-allele attributes including the reference allele, subset to relevant alleles - * @param originalList - * @param indexesOfRelevantAlleles - * @return - */ - public static List remapRLengthList(final List originalList, final int[] indexesOfRelevantAlleles) { - Utils.nonNull(originalList); - Utils.nonNull(indexesOfRelevantAlleles); - - return remapList(originalList, indexesOfRelevantAlleles, 0); - } - - /** - * Given a list of per-alt-allele attributes, subset to relevant alt alleles - * @param originalList - * @param indexesOfRelevantAlleles - * @return - */ - public static List remapALengthList(final List originalList, final int[] indexesOfRelevantAlleles) { - Utils.nonNull(originalList); - Utils.nonNull(indexesOfRelevantAlleles); - - return remapList(originalList, indexesOfRelevantAlleles, 1); - } - - /** - * Subset a list of per-allele attributes - * - * @param originalList input per-allele attributes - * @param indexesOfRelevantAlleles indexes of alleles to keep, including the reference - * @param offset used to indicate whether to include the ref allele values in the output or not - * @return a non-null List - */ - private static List remapList(final List originalList, final int[] indexesOfRelevantAlleles, - final int offset) { - final int numValues = indexesOfRelevantAlleles.length - offset; //since these are log odds, this should just be alts - final List newValues = new ArrayList<>(); - - //force attributes for the non-ref to go to zero, even though that allele occasionally picks up AD counts - final int filler = 0; - - for ( int i = offset; i < numValues + offset; i++ ) { - final int oldIndex = indexesOfRelevantAlleles[i]; - if ( oldIndex >= originalList.size() + offset ) { - newValues.add(i-offset, filler); - } else { - newValues.add(i-offset, originalList.get(oldIndex-offset)); - } - } - return newValues; - } } \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java index 892b45a22d6..e9549cf5f2c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AnnotationUtils.java @@ -5,7 +5,7 @@ import java.util.*; public final class AnnotationUtils { - public static final String ALLELE_SPECIFIC_PRINT_DELIM = "|"; + public static final String ALLELE_SPECIFIC_RAW_DELIM = "|"; public static final String ALLELE_SPECIFIC_REDUCED_DELIM = ","; public static final String ALLELE_SPECIFIC_SPLIT_REGEX = "\\|"; //String.split takes a regex, so we need to escape the pipe public static final String BRACKET_REGEX = "\\[|\\]"; @@ -36,16 +36,21 @@ public static String encodeStringList( final List stringList) { } /** - * Helper function to convert a List of Strings to a pipe-separated String, as for raw annotations + * Helper function to convert a List of Strings to a @{value ALLELE_SPECIFIC_RAW_DELIM)-separated String, as for raw annotations * @param somethingList the ArrayList with String data - * @return a pipe-separated String + * @return a delimited String */ - public static String encodeAnyASList( final List somethingList) { - return StringUtils.join(somethingList, ALLELE_SPECIFIC_PRINT_DELIM).replaceAll(BRACKET_REGEX, ""); //Who actually wants brackets at the ends of their string? Who??? + public static String encodeAnyASListWithRawDelim(final List somethingList) { + return StringUtils.join(somethingList, ALLELE_SPECIFIC_RAW_DELIM).replaceAll(BRACKET_REGEX, ""); //Who actually wants brackets at the ends of their string? Who??? } - public static List decodeAnyASListWithPrintDelim( final String somethingList) { - return Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(somethingList.replaceAll(BRACKET_REGEX, ""), ALLELE_SPECIFIC_PRINT_DELIM)); + /** + * Helper method to split a "raw" annotation string delimited with {@value ALLELE_SPECIFIC_RAW_DELIM} + * @param somethingList a String, possibly read from a VCF + * @return a List of Strings + */ + public static List decodeAnyASListWithRawDelim(final String somethingList) { + return Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(somethingList.replaceAll(BRACKET_REGEX, ""), ALLELE_SPECIFIC_RAW_DELIM)); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RMSMappingQuality.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RMSMappingQuality.java index 8ed31f340c2..8f7b9597fca 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RMSMappingQuality.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_RMSMappingQuality.java @@ -216,7 +216,7 @@ private String makeRawAnnotationString(final List vcAlleles, final Map vcAlleles, final Map for (int i = 0; i< vcAlleles.size(); i++) { if (!vcAlleles.get(i).isReference()) { if (i != 0) { //strings will always start with a printDelim because we won't have values for the reference allele, but keep this for consistency with other annotations - annotationString += AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM; + annotationString += AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM; } final Double alleleValue = perAlleleValues.get(vcAlleles.get(i)); //can be null if there are no ref reads @@ -300,7 +300,7 @@ protected String makeCombinedAnnotationString(final List vcAlleles, fina for (int i = 0; i< vcAlleles.size(); i++) { if (!vcAlleles.get(i).isReference()) { if (i != 0) { //strings will always start with a printDelim because we won't have values for the reference allele, but keep this for consistency with other annotations - annotationString += AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM; + annotationString += AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM; } final Histogram alleleValue = perAlleleValues.get(vcAlleles.get(i)); //can be null if there are no ref reads diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java index d8e6a4d3e30..942dd45e998 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java @@ -2,7 +2,6 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; -import org.broadinstitute.hellbender.engine.filters.VariantFilter; import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; import org.broadinstitute.hellbender.utils.read.GATKRead; @@ -19,7 +18,7 @@ public class StrandBiasUtils { public static Map computeSBAnnotation(VariantContext vc, AlleleLikelihoods likelihoods, String key) { // calculate the annotation from the likelihoods - // likelihoods can come from HaplotypeCaller call to VariantAnnotatorEngine + // likelihoods can come from HaplotypeCaller or Mutect2 call to VariantAnnotatorEngine final Map annotations = new HashMap<>(); final ReducibleAnnotationData> myData = new AlleleSpecificAnnotationData<>(vc.getAlleles(),null); getStrandCountsFromLikelihoodMap(vc, likelihoods, myData, MIN_COUNT); @@ -30,13 +29,19 @@ public static Map computeSBAnnotation(VariantContext vc, AlleleL return annotations; } + /** + * Helper method to output raw allele-specific strand counts as a string + * @param vcAlleles relevant alleles + * @param perAlleleValues forward and reverse read counts for each allele + * @return a String appropriate to use for annotating a GVCF + */ protected static String makeRawAnnotationString(final List vcAlleles, final Map> perAlleleValues) { final List alleleStrings = vcAlleles.stream() // does not replace a null value with zero list - only if the key is not in the map .map(a -> perAlleleValues.getOrDefault(a, ZERO_LIST)) .map(StrandBiasUtils::encode) .collect(Collectors.toList()); - return String.join(AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM, alleleStrings); + return String.join(AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM, alleleStrings); } @@ -46,11 +51,17 @@ protected static String encode(List alleleValues) { /** - Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: - * fw rc + Allocate and fill a Nx2 strand contingency table where N is the number of alleles. In the end, it'll look something like this: + * fwd rev * allele1 # # * allele2 # # - * @return a 2x2 contingency table + * + * NOTE:Only use informative reads + * + * @param vc VariantContext from which to get alleles + * @param likelihoods per-read allele likelihoods to determine if each read is informative + * @param perAlleleValues modified to store the output counts + * @param minCount minimum threshold of counts to use */ public static void getStrandCountsFromLikelihoodMap( final VariantContext vc, final AlleleLikelihoods likelihoods, @@ -74,6 +85,11 @@ public static void getStrandCountsFromLikelihoodMap( final VariantContext vc, } } + /** + * Combine allele-specific data from two ReducibleAnnotationData data structures + * @param toAdd input values + * @param combined modified to return the combined values + */ protected static void combineAttributeMap(final ReducibleAnnotationData> toAdd, final ReducibleAnnotationData> combined) { for (final Allele a : combined.getAlleles()) { if (toAdd.hasAttribute(a) && toAdd.getAttribute(a) != null) { @@ -91,6 +107,14 @@ protected static void combineAttributeMap(final ReducibleAnnotationData allAlts, final ReducibleAnnotationData> perAlleleValues) { final boolean matchesRef = bestAllele.equals(ref, true); @@ -131,12 +155,17 @@ protected static boolean passesMinimumThreshold(final ReducibleAnnotationData
  • > getSBsForAlleles(VariantContext vc) { String sbStr = vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_SB_TABLE_KEY, null); if (sbStr == null || sbStr.isEmpty()) { return Collections.emptyList(); } - List asb = AnnotationUtils.decodeAnyASListWithPrintDelim(sbStr); + List asb = AnnotationUtils.decodeAnyASListWithRawDelim(sbStr); return asb.stream() .map(fwdrev -> AnnotationUtils.decodeAnyASList(fwdrev).stream().map(String::trim) .mapToInt(Integer::parseInt).boxed().collect(Collectors.toList())).collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java index 24d7c8db937..c0e0a28c5ce 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java @@ -1,9 +1,12 @@ package org.broadinstitute.hellbender.tools.walkers.genotyper; import com.google.common.annotations.VisibleForTesting; +import com.google.common.primitives.Doubles; +import com.google.common.primitives.Ints; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.*; import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.walkers.ReferenceConfidenceVariantContextMerger; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.Utils; @@ -140,7 +143,7 @@ public static GenotypesContext subsetSomaticAlleles(final VCFHeader outputHeader } } gb.alleles(keepGTAlleles); - gb.AD(ReferenceConfidenceVariantContextMerger.generateAD(g.getAD(), relevantIndices)); + gb.AD(generateAD(g.getAD(), relevantIndices)); Set keys = g.getExtendedAttributes().keySet(); for (final String key : keys) { final VCFFormatHeaderLine headerLine = outputHeader.getFormatHeaderLine(key); @@ -384,4 +387,203 @@ public static int[] subsettedPLIndices(final int ploidy, final List orig } return result; } + + /** + * Determines the allele mapping from myAlleles to the targetAlleles, substituting the generic "" as appropriate. + * If the remappedAlleles set does not contain "" as an allele, it throws an exception. + * + * @param remappedAlleles the list of alleles to evaluate + * @param targetAlleles the target list of alleles + * @param position position to output error info + * @param g genotype from which targetAlleles are derived + * @return non-null array of ints representing indexes + */ + public static int[] getIndexesOfRelevantAllelesForGVCF(final List remappedAlleles, final List targetAlleles, final int position, final Genotype g, final boolean doSomaticMerge) { + + Utils.nonEmpty(remappedAlleles); + Utils.nonEmpty(targetAlleles); + + if ( !remappedAlleles.contains(Allele.NON_REF_ALLELE) ) { + throw new UserException("The list of input alleles must contain " + Allele.NON_REF_ALLELE + " as an allele but that is not the case at position " + position + "; please use the Haplotype Caller with gVCF output to generate appropriate records"); + } + + final int indexOfNonRef = remappedAlleles.indexOf(Allele.NON_REF_ALLELE); + final int[] indexMapping = new int[targetAlleles.size()]; + + // the reference likelihoods should always map to each other (even if the alleles don't) + indexMapping[0] = 0; + + // create the index mapping, using the allele whenever such a mapping doesn't exist + for ( int i = 1; i < targetAlleles.size(); i++ ) { + // if there's more than 1 spanning deletion (*) allele then we need to use the best one + if (targetAlleles.get(i) == Allele.SPAN_DEL && !doSomaticMerge && g.hasPL()) { + final int occurrences = Collections.frequency(remappedAlleles, Allele.SPAN_DEL); + if (occurrences > 1) { + final int indexOfBestDel = indexOfBestDel(remappedAlleles, g.getPL(), g.getPloidy()); + indexMapping[i] = (indexOfBestDel == -1 ? indexOfNonRef : indexOfBestDel); + continue; + } + } + + final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i)); + indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfNonRef : indexOfRemappedAllele; + } + + return indexMapping; + } + + public static int[] getIndexesOfRelevantAlleles(final List remappedAlleles, final List targetAlleles, final int position, final Genotype g) { + Utils.nonEmpty(remappedAlleles); + Utils.nonEmpty(targetAlleles); + + final int[] indexMapping = new int[targetAlleles.size()]; + + // the reference likelihoods should always map to each other (even if the alleles don't) + indexMapping[0] = 0; + + for ( int i = 1; i < targetAlleles.size(); i++ ) { + // if there's more than 1 spanning deletion (*) allele then we need to use the best one + if (targetAlleles.get(i) == Allele.SPAN_DEL && g.hasPL()) { + final int occurrences = Collections.frequency(remappedAlleles, Allele.SPAN_DEL); + if (occurrences > 1) { + final int indexOfBestDel = indexOfBestDel(remappedAlleles, g.getPL(), g.getPloidy()); + if (indexOfBestDel == -1) { + throw new IllegalArgumentException("At position " + position + " targetAlleles contains a spanning deletion, but remappedAlleles does not."); + } + indexMapping[i] = indexOfBestDel; + continue; + } + } + + final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i)); + if (indexOfRemappedAllele == -1) { + throw new IllegalArgumentException("At position " + position + " targetAlleles contains a " + targetAlleles.get(i) + " allele, but remappedAlleles does not."); + } + indexMapping[i] = indexOfRemappedAllele; + } + + return indexMapping; + } + + /** + * Returns the index of the best spanning deletion allele based on AD counts + * + * @param alleles the list of alleles + * @param PLs the list of corresponding PL values + * @param ploidy the ploidy of the sample + * @return the best index or -1 if not found + */ + private static int indexOfBestDel(final List alleles, final int[] PLs, final int ploidy) { + int bestIndex = -1; + int bestPL = Integer.MAX_VALUE; + + for ( int i = 0; i < alleles.size(); i++ ) { + if ( alleles.get(i) == Allele.SPAN_DEL ) { + final int homAltIndex = findHomIndex(GL_CALCS.getInstance(ploidy, alleles.size()), i, ploidy); + final int PL = PLs[homAltIndex]; + if ( PL < bestPL ) { + bestIndex = i; + bestPL = PL; + } + } + } + + return bestIndex; + } + + /** //TODO simplify these methods + * Returns the index of the PL that represents the homozygous genotype of the given i'th allele + * + * @param i the index of the allele with the list of alleles + * @param ploidy the ploidy of the sample + * @return the hom index + */ + private static int findHomIndex(final GenotypeLikelihoodCalculator calculator, final int i, final int ploidy) { + // some quick optimizations for the common case + if ( ploidy == 2 ) + return GenotypeLikelihoods.calculatePLindex(i, i); + if ( ploidy == 1 ) + return i; + + final int[] alleleIndexes = new int[ploidy]; + Arrays.fill(alleleIndexes, i); + return calculator.allelesToIndex(alleleIndexes); + } + + /** + * Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current + * alleles from the original AD. + * + * @param originalAD the original AD to extend + * @param indexesOfRelevantAlleles the indexes of the original alleles corresponding to the new alleles + * @return non-null array of new AD values + */ + public static int[] generateAD(final int[] originalAD, final int[] indexesOfRelevantAlleles) { + final List adList = (List)remapRLengthList(Arrays.stream(originalAD).boxed().collect(Collectors.toList()), indexesOfRelevantAlleles); + return Ints.toArray(adList); + } + + /** + * Generates a new AF (allele fraction) array + * @param originalAF + * @param indexesOfRelevantAlleles + * @return non-null array of new AFs + */ + public static double[] generateAF(final double[] originalAF, final int[] indexesOfRelevantAlleles) { + final List afList = (List)remapALengthList(Arrays.stream(originalAF).boxed().collect(Collectors.toList()), indexesOfRelevantAlleles); + return Doubles.toArray(afList); + } + + /** + * Given a list of per-allele attributes including the reference allele, subset to relevant alleles + * @param originalList + * @param indexesOfRelevantAlleles + * @return + */ + public static List remapRLengthList(final List originalList, final int[] indexesOfRelevantAlleles) { + Utils.nonNull(originalList); + Utils.nonNull(indexesOfRelevantAlleles); + + return remapList(originalList, indexesOfRelevantAlleles, 0); + } + + /** + * Given a list of per-alt-allele attributes, subset to relevant alt alleles + * @param originalList + * @param indexesOfRelevantAlleles + * @return + */ + public static List remapALengthList(final List originalList, final int[] indexesOfRelevantAlleles) { + Utils.nonNull(originalList); + Utils.nonNull(indexesOfRelevantAlleles); + + return remapList(originalList, indexesOfRelevantAlleles, 1); + } + + /** + * Subset a list of per-allele attributes + * + * @param originalList input per-allele attributes + * @param indexesOfRelevantAlleles indexes of alleles to keep, including the reference + * @param offset used to indicate whether to include the ref allele values in the output or not + * @return a non-null List + */ + private static List remapList(final List originalList, final int[] indexesOfRelevantAlleles, + final int offset) { + final int numValues = indexesOfRelevantAlleles.length - offset; //since these are log odds, this should just be alts + final List newValues = new ArrayList<>(); + + //force attributes for the non-ref to go to zero, even though that allele occasionally picks up AD counts + final int filler = 0; + + for ( int i = offset; i < numValues + offset; i++ ) { + final int oldIndex = indexesOfRelevantAlleles[i]; + if ( oldIndex >= originalList.size() + offset ) { + newValues.add(i-offset, filler); + } else { + newValues.add(i-offset, originalList.get(oldIndex-offset)); + } + } + return newValues; + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java index 20de0fa057e..1947ad26f96 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java @@ -6,6 +6,7 @@ import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFFilterHeaderLine; import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; @@ -15,11 +16,12 @@ import org.broadinstitute.hellbender.engine.TwoPassVariantWalker; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import picard.cmdline.programgroups.VariantFilteringProgramGroup; import java.io.File; - -import static org.broadinstitute.hellbender.utils.variant.GATKVCFConstants.LOW_HET_FILTER_NAME; +import java.util.HashSet; +import java.util.Set; @CommandLineProgramProperties( summary = "If too many low heteroplasmy sites pass other filters, then filter all low heteroplasmy sites", @@ -53,11 +55,11 @@ public class MTLowHeteroplasmyFilterTool extends TwoPassVariantWalker { @Override public void onTraversalStart() { - final VCFHeader inputHeader = getHeaderForVariants(); - // TODO why isn't it being added in the GATKVCFHeaderLines - inputHeader.addMetaDataLine(new VCFFilterHeaderLine(LOW_HET_FILTER_NAME, "All low heteroplasmy sites are filtered when at least x low het sites pass all other filters")); + final Set headerInfo = new HashSet<>(); + headerInfo.addAll(getHeaderForVariants().getMetaDataInInputOrder()); + headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_HET_FILTER_NAME)); vcfWriter = createVCFWriter(new File(outputVcf)); - vcfWriter.writeHeader(inputHeader); + vcfWriter.writeHeader(new VCFHeader(headerInfo)); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java index 469fa6677fa..7e677c2b9be 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java @@ -22,14 +22,14 @@ public MappingQualityFilter(final double minMedianMappingQuality, final int long public ErrorType errorType() { return ErrorType.ARTIFACT; } @Override - public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { + public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, final ReferenceContext referenceContext) { final List indelLengths = vc.getIndelLengths(); // alts only - List mappingQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY, 0); + final List mappingQualityByAllele = vc.getAttributeAsIntList(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY, 0); // we use the mapping quality annotation of the alt allele in most cases, but for long indels we use the reference // annotation. We have to do this because the indel, even if it maps uniquely, gets a poor mapping quality // by virtue of its mismatch. The reference mapping quality is a decent proxy for the region's mappability. - int refQual = mappingQualityByAllele.remove(0); // get the ref value and convert list to alts only + final int refQual = mappingQualityByAllele.remove(0); // get the ref value and convert list to alts only new IndexRange(0, mappingQualityByAllele.size()).forEach(i -> { if (indelLengths != null && indelLengths.get(i) >= longIndelSize) { mappingQualityByAllele.set(i, refQual); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 7f65f5886af..4902f41f6b8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -198,7 +198,7 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> allele.isReference() || allele.isSymbolic() ? VCFConstants.EMPTY_INFO_FIELD : mergedFilterStringByAllele.next()).collect(Collectors.toList()); - String finalAttrString = AnnotationUtils.encodeAnyASList(orderedASFilterStrings); + String finalAttrString = AnnotationUtils.encodeAnyASListWithRawDelim(orderedASFilterStrings); vcb.putAttributes(Collections.singletonMap(GATKVCFConstants.AS_FILTER_STATUS_KEY, finalAttrString)); @@ -253,9 +253,9 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext * @param filtersForAllele all the filters applied to the allele * @return encoded (comma separated) list of filters that apply to the allele */ - private List getDistinctFiltersForAllele(List filtersForAllele) { + private List getDistinctFiltersForAllele(final List filtersForAllele) { // loop through each filter and pull out the filters the specified allele - List results = filtersForAllele.stream().distinct().collect(Collectors.toList()); + final List results = filtersForAllele.stream().distinct().collect(Collectors.toList()); if (results.size() > 1 && results.contains(VCFConstants.PASSES_FILTERS_v4)) { results.remove(VCFConstants.PASSES_FILTERS_v4); } else if (results.isEmpty()) { @@ -272,7 +272,7 @@ private List getDistinctFiltersForAllele(List filtersForAllele) * @param filterName the name of the filter being evaluated * @return List of filtername or "PASS" for each allele */ - private List addFilterStrings(List probabilities, double errorThreshold, String filterName) { + private List addFilterStrings(final List probabilities, final double errorThreshold, final String filterName) { return probabilities.stream().map(value -> value > errorThreshold ? filterName : VCFConstants.PASSES_FILTERS_v4).collect(Collectors.toList()); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java index b4154c21705..ce7a3a3fbf6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java @@ -189,7 +189,8 @@ public void onTraversalStart() { final Set actualLines = VcfUtils.updateHeaderContigLines(createVCFHeaderLineList(vcfHeaders), refPath, getReferenceDictionary(), suppressReferencePath); vcfWriter = createVCFWriter(outFile); - vcfWriter.writeHeader(new VCFHeader(actualLines, vcfSamples)); + vcfHeader = new VCFHeader(actualLines, vcfSamples); + vcfWriter.writeHeader(vcfHeader); } /** @@ -217,8 +218,16 @@ private Set createVCFHeaderLineList(Map vcfHea */ @Override public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext ref, FeatureContext featureContext) { - final List vcList = splitMultiallelics ? GATKVariantContextUtils.splitVariantContextToBiallelics(vc, false, - GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, keepOriginalChrCounts) : Collections.singletonList(vc); + final List vcList; + if (splitMultiallelics) { + if (vc.getGenotypes().stream().anyMatch(g -> g.hasAnyAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY))) { + vcList = GATKVariantContextUtils.splitSomaticVariantContextToBiallelics(vc, false, vcfHeader); + } else { + vcList = GATKVariantContextUtils.splitVariantContextToBiallelics(vc, false, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, keepOriginalChrCounts); + } + } else { + vcList = Collections.singletonList(vc); + } for (final VariantContext splitVariant : vcList) { final List indelLengths = splitVariant.getIndelLengths(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java index cade9196963..dee13c1d79a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java @@ -10,7 +10,6 @@ import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection; import org.broadinstitute.hellbender.engine.*; import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.tools.walkers.ReferenceConfidenceVariantContextMerger; import org.broadinstitute.hellbender.tools.walkers.annotator.*; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_QualByDepth; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_StandardAnnotation; @@ -500,9 +499,9 @@ protected VariantContext cleanUpHighQualityVariant(final VariantContext result, if (origMap.containsKey(rawKey)) { if (allelesNeedSubsetting && AnnotationUtils.isAlleleSpecific(annotation)) { List alleleSpecificValues = AnnotationUtils.getAlleleLengthListOfString(originalVC.getAttributeAsString(rawKey, null)); - final List subsetList = alleleSpecificValues.size() > 0 ? ReferenceConfidenceVariantContextMerger.remapRLengthList(alleleSpecificValues, relevantIndices) + final List subsetList = alleleSpecificValues.size() > 0 ? AlleleSubsettingUtils.remapRLengthList(alleleSpecificValues, relevantIndices) : Collections.nCopies(relevantIndices.length, ""); - attrMap.put(rawKey, AnnotationUtils.encodeAnyASList(subsetList)); + attrMap.put(rawKey, AnnotationUtils.encodeAnyASListWithRawDelim(subsetList)); } else { attrMap.put(rawKey, origMap.get(rawKey)); } @@ -539,12 +538,12 @@ protected VariantContext cleanUpHighQualityVariant(final VariantContext result, quals.add("0"); } } - attrMap.put(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM+String.join(AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM, quals)); + attrMap.put(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM +String.join(AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM, quals)); List as_varDP = AS_QualByDepth.getAlleleDepths(AlleleSubsettingUtils.subsetAlleles(result.getGenotypes(), HomoSapiensConstants.DEFAULT_PLOIDY, result.getAlleles(), newAlleleSet, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, result.getAttributeAsInt(VCFConstants.DEPTH_KEY,0))); if (as_varDP != null) { - attrMap.put(GATKVCFConstants.AS_VARIANT_DEPTH_KEY, as_varDP.stream().map( n -> Integer.toString(n)).collect(Collectors.joining(AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM))); + attrMap.put(GATKVCFConstants.AS_VARIANT_DEPTH_KEY, as_varDP.stream().map( n -> Integer.toString(n)).collect(Collectors.joining(AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM))); } } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index ca253cdf0a4..014a0b70759 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -101,6 +101,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addFilterLine(new VCFFilterHeaderLine(POSSIBLE_NUMT_FILTER_NAME, "Allele depth is below expected coverage of NuMT in autosome")); addFilterLine(new VCFFilterHeaderLine(LOW_HET_FILTER_NAME, "All low heteroplasmy sites are filtered when at least x low het sites pass all other filters")); addFilterLine(new VCFFilterHeaderLine(FAIL, "Fail the site if all alleles fail but for different reasons.")); + addFilterLine(new VCFFilterHeaderLine(LOW_HET_FILTER_NAME, "All low heteroplasmy sites are filtered when at least x low het sites pass all other filters")); addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); addFormatLine(new VCFFormatHeaderLine(MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 6f9b93ca9e0..4fbe6e2cb16 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -7,10 +7,7 @@ import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; -import htsjdk.variant.vcf.VCFConstants; -import htsjdk.variant.vcf.VCFHeaderLine; -import htsjdk.variant.vcf.VCFSimpleHeaderLine; -import htsjdk.variant.vcf.VCFStandardHeaderLines; +import htsjdk.variant.vcf.*; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.MutablePair; @@ -1401,6 +1398,46 @@ public static VariantContext makeFromAlleles(final String name, final String con return new VariantContextBuilder(name, contig, start, start+length-1, alleles).make(); } + public static List splitSomaticVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft, final VCFHeader outputHeader) { + Utils.nonNull(vc); + + if (!vc.isVariant() || vc.isBiallelic()) { + // non variant or biallelics already satisfy the contract + return Collections.singletonList(vc); + } else { + final List biallelics = new LinkedList<>(); + + int altIndex = 1; + for (final Allele alt : vc.getAlternateAlleles()) { + final VariantContextBuilder builder = new VariantContextBuilder(vc); + + // make biallelic alleles + final List alleles = Arrays.asList(vc.getReference(), alt); + builder.alleles(alleles); + + // split allele specific filters + int index = vc.getAlleleIndex(alt); + // the reason we are getting as list and then joining on , is because the default getAttributeAsString for a list will add spaces between items which we don't + // want to have to trim out later in the code + String asfiltersStr = String.join(",", vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD)); + List filtersList = AnnotationUtils.decodeAnyASListWithRawDelim(asfiltersStr); + if (filtersList.size() > index) { + String filters = filtersList.get(index); + if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { + AnnotationUtils.decodeAnyASList(filters).stream().forEach(filter -> builder.filter(filter)); + } + builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeAnyASListWithRawDelim(new ArrayList<>(Arrays.asList(filtersList.get(0), filters)))); + } + + builder.genotypes(AlleleSubsettingUtils.subsetSomaticAlleles(outputHeader, vc.getGenotypes(), alleles, new int[]{0, altIndex})); + final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true); + biallelics.add(trimmed); + altIndex++; + } + return biallelics; + } + } + /** * Split variant context into its biallelic components if there are more than 2 alleles *

    @@ -1445,19 +1482,8 @@ public static List splitVariantContextToBiallelics(final Variant } } - // split allele specific filters - int index = vc.getAlleleIndex(alt); - // the reason we are getting as list and then joining on , is because the default getAttributeAsString for a list will add spaces between items which we don't - // want to have to trim out later in the code - String asfiltersStr = String.join(",", vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD)); - List filtersList = AnnotationUtils.decodeAnyASListWithPrintDelim(asfiltersStr); - if (filtersList.size() > index) { - String filters = filtersList.get(index); - if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { - AnnotationUtils.decodeAnyASList(filters).stream().forEach(filter -> builder.filter(filter)); - } - builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeAnyASList(new ArrayList<>(Arrays.asList(filtersList.get(0), filters)))); - } + //TODO: split allele-specific filters (which are comma-delimited, as applied by VQSR) + // subset INFO field annotations if available if genotype is called if (genotypeAssignmentMethodUsed != GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS && diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMergerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMergerUnitTest.java index 595e181a270..78f80f3b7f4 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMergerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMergerUnitTest.java @@ -5,6 +5,7 @@ import htsjdk.variant.vcf.VCFHeader; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; +import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.GATKBaseTest; @@ -76,78 +77,16 @@ public void testGetBestDepthValue(final Genotype genotype, final int expectedDep Assert.assertEquals(ReferenceConfidenceVariantContextMerger.getBestDepthValue(genotype), expectedDepth); } - @Test public void testGenerateADWithNewAlleles() { final int[] originalAD = new int[] {1,2,0}; final int[] indexesOfRelevantAlleles = new int[] {0,1,2,2}; - final int[] newAD = ReferenceConfidenceVariantContextMerger.generateAD(originalAD, indexesOfRelevantAlleles); + final int[] newAD = AlleleSubsettingUtils.generateAD(originalAD, indexesOfRelevantAlleles); Assert.assertEquals(newAD, new int[]{1,2,0,0}); } - - @Test(expectedExceptions = UserException.class) - public void testGetIndexesOfRelevantAllelesWithNoALT() { - ReferenceConfidenceVariantContextMerger merger = new ReferenceConfidenceVariantContextMerger(getAnnotationEngine(), new VCFHeader()); - - final List alleles1 = new ArrayList<>(1); - alleles1.add(Allele.create("A", true)); - final List alleles2 = new ArrayList<>(1); - alleles2.add(Allele.create("A", true)); - GenotypeBuilder builder = new GenotypeBuilder(); - merger.getIndexesOfRelevantAlleles(alleles1, alleles2, -1, builder.make()); - Assert.fail("We should have thrown an exception because the allele was not present"); - } - - @Test(dataProvider = "getIndexesOfRelevantAllelesData") - public void testGetIndexesOfRelevantAlleles(final int allelesIndex, final List allAlleles) { - final List myAlleles = new ArrayList<>(3); - ReferenceConfidenceVariantContextMerger merger = new ReferenceConfidenceVariantContextMerger(getAnnotationEngine(), new VCFHeader()); - - // always add the reference and alleles - myAlleles.add(allAlleles.get(0)); - myAlleles.add(Allele.NON_REF_ALLELE); - // optionally add another alternate allele - if ( allelesIndex > 0 ) - myAlleles.add(allAlleles.get(allelesIndex)); - - GenotypeBuilder builder = new GenotypeBuilder(); - - final int[] indexes = merger.getIndexesOfRelevantAlleles(myAlleles, allAlleles, -1, builder.make()); - - Assert.assertEquals(indexes.length, allAlleles.size()); - - for ( int i = 0; i < allAlleles.size(); i++ ) { - if ( i == 0 ) - Assert.assertEquals(indexes[i], 0); // ref should always match - else if ( i == allelesIndex ) - Assert.assertEquals(indexes[i], 2); // allele - else - Assert.assertEquals(indexes[i], 1); // - } - } - - // This test asserts that when we us getINdexesOfRelevantAlleles in the case where there are multiple spanning deletions - // that we remap the PL indexes according to the BEST spanning deletion instead of the first one, which can happen if - // there were multiple spanning deletion alleles which are replaced with the same symbolic alleles before being fed to - // referenceConfidenceVariantContextMerger. - @Test (dataProvider = "getIndexesOfRelevantAllelesDataSpanningDels") - public void testGetIndexesOfRelevantAllelesMultiSpanningDel(final List allelesToFind, final List allAlleles, final Genotype g, final int expectedIndex) { - ReferenceConfidenceVariantContextMerger merger = new ReferenceConfidenceVariantContextMerger(getAnnotationEngine(), new VCFHeader()); - - final int[] indexes = merger.getIndexesOfRelevantAlleles(allAlleles, allelesToFind,-1, g); - - Assert.assertEquals(indexes.length, allelesToFind.size()); - - // Asserting that the expected index for the spanning deletion allele corresponds to the most likely one according to the PL - Assert.assertEquals(indexes[0], 0); // ref should always match - Assert.assertEquals(indexes[1], expectedIndex); // allele - Assert.assertEquals(indexes[2], 4); // - } - - @DataProvider(name = "referenceConfidenceMergeData") public Object[][] makeReferenceConfidenceMergeData() { final List tests = new ArrayList<>(); @@ -268,55 +207,6 @@ public Object[][] makeReferenceConfidenceMergeData() { return tests.toArray(new Object[][]{}); } - @DataProvider(name = "getIndexesOfRelevantAllelesData") - public Object[][] makeGetIndexesOfRelevantAllelesData() { - final int totalAlleles = 5; - final List alleles = new ArrayList<>(totalAlleles); - alleles.add(Allele.create("A", true)); - for ( int i = 1; i < totalAlleles; i++ ) - alleles.add(Allele.create(Utils.dupChar('A', i + 1), false)); - - final List tests = new ArrayList<>(); - - for ( int alleleIndex = 0; alleleIndex < totalAlleles; alleleIndex++ ) { - tests.add(new Object[]{alleleIndex, alleles}); - } - - return tests.toArray(new Object[][]{}); - } - - @DataProvider(name = "getIndexesOfRelevantAllelesDataSpanningDels") - public Object[][] makeGetIndexesOfRelevantAllelesDataSpanningDels() { - final int totalAlleles = 5; - final List alleles = new ArrayList<>(totalAlleles); - alleles.add(Allele.create("A", true)); - alleles.add(Allele.create("*", false)); - alleles.add(Allele.create("*", false)); - alleles.add(Allele.create("*", false)); - alleles.add(Allele.NON_REF_ALLELE); - - final List suballeles = new ArrayList<>(); - suballeles.add(Allele.create("A", true)); - suballeles.add(Allele.create("*", false)); - - Genotype firstAltBest = new GenotypeBuilder("sampleName").alleles(suballeles).PL(new double[]{0, 0, 30, 0, 0, 20, 0, 0, 0, 10, - 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0}).make(); - Genotype secondAltBest = new GenotypeBuilder("sampleName").alleles(suballeles).PL(new double[]{0, 0, 20, 0, 0, 30, 0, 0, 0, 10, - 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0}).make(); - Genotype thirdAltBest = new GenotypeBuilder("sampleName").alleles(suballeles).PL(new double[]{0, 0, 20, 0, 0, 10, 0, 0, 0, 30, - 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0}).make(); - Genotype altsTied = new GenotypeBuilder("sampleName").alleles(suballeles).PL(new double[]{0, 0, 20, 0, 0, 30, 0, 0, 0, 30, - 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0}).make(); - - final List tests = new ArrayList<>(); - - tests.add(new Object[]{alleles.stream().distinct().collect(Collectors.toList()), alleles, firstAltBest, 1}); - tests.add(new Object[]{alleles.stream().distinct().collect(Collectors.toList()), alleles, secondAltBest, 2}); - tests.add(new Object[]{alleles.stream().distinct().collect(Collectors.toList()), alleles, thirdAltBest, 3}); - tests.add(new Object[]{alleles.stream().distinct().collect(Collectors.toList()), alleles, altsTied, 2}); - - return tests.toArray(new Object[][]{}); - } @DataProvider public Object[][] allelesToRemap(){ diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_BaseQualityRankSumTestUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_BaseQualityRankSumTestUnitTest.java index cb19876c25c..0fd555917a5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_BaseQualityRankSumTestUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/AS_BaseQualityRankSumTestUnitTest.java @@ -90,7 +90,7 @@ public void testBaseQualRawAnnotate() { String secondExpected = String.format("%.1f",Math.round(Math.floor((expectedAlt2.getZ() )/0.1))*0.1); // Note, when we output the raw annotated RankSum score, we output the MannWhitneyU test Z value as a histogram for each alt allele - final String expectedAnnotation = AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM + firstExpected + ",1" + AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM + secondExpected + ",1"; + final String expectedAnnotation = AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM + firstExpected + ",1" + AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM + secondExpected + ",1"; final MannWhitneyU.Result annotateResult = mannWhitneyU.test(Stream.concat(Arrays.stream(alt1BaseQuals).boxed(), Arrays.stream(alt2BaseQuals).boxed()).mapToDouble(i->(double)i).toArray(), Arrays.stream(refBaseQuals).asDoubleStream().toArray(), MannWhitneyU.TestType.FIRST_DOMINATES); final double annotateZScore = annotateResult.getZ(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtilsUnitTest.java index eca0d23fdac..3a02bdfe94a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtilsUnitTest.java @@ -2,9 +2,12 @@ import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; +import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.testng.Assert; @@ -15,6 +18,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; public class AlleleSubsettingUtilsUnitTest extends GATKBaseTest { @@ -23,6 +27,112 @@ public class AlleleSubsettingUtilsUnitTest extends GATKBaseTest { private static final Allele C = Allele.create("C"); private static final Allele G = Allele.create("G"); + @DataProvider(name = "getIndexesOfRelevantAllelesData") + public Object[][] makeGetIndexesOfRelevantAllelesData() { + final int totalAlleles = 5; + final List alleles = new ArrayList<>(totalAlleles); + alleles.add(Allele.create("A", true)); + for ( int i = 1; i < totalAlleles; i++ ) + alleles.add(Allele.create(Utils.dupChar('A', i + 1), false)); + + final List tests = new ArrayList<>(); + + for ( int alleleIndex = 0; alleleIndex < totalAlleles; alleleIndex++ ) { + tests.add(new Object[]{alleleIndex, alleles, true}); + tests.add(new Object[]{alleleIndex, alleles, false}); + } + + return tests.toArray(new Object[][]{}); + } + + @DataProvider(name = "getIndexesOfRelevantAllelesDataSpanningDels") + public Object[][] makeGetIndexesOfRelevantAllelesDataSpanningDels() { + final int totalAlleles = 5; + final List alleles = new ArrayList<>(totalAlleles); + alleles.add(Allele.create("A", true)); + alleles.add(Allele.create("*", false)); + alleles.add(Allele.create("*", false)); + alleles.add(Allele.create("*", false)); + alleles.add(Allele.NON_REF_ALLELE); + + final List suballeles = new ArrayList<>(); + suballeles.add(Allele.create("A", true)); + suballeles.add(Allele.create("*", false)); + + Genotype firstAltBest = new GenotypeBuilder("sampleName").alleles(suballeles).PL(new double[]{0, 0, 30, 0, 0, 20, 0, 0, 0, 10, + 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0}).make(); + Genotype secondAltBest = new GenotypeBuilder("sampleName").alleles(suballeles).PL(new double[]{0, 0, 20, 0, 0, 30, 0, 0, 0, 10, + 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0}).make(); + Genotype thirdAltBest = new GenotypeBuilder("sampleName").alleles(suballeles).PL(new double[]{0, 0, 20, 0, 0, 10, 0, 0, 0, 30, + 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0}).make(); + Genotype altsTied = new GenotypeBuilder("sampleName").alleles(suballeles).PL(new double[]{0, 0, 20, 0, 0, 30, 0, 0, 0, 30, + 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0}).make(); + + final List tests = new ArrayList<>(); + + tests.add(new Object[]{alleles.stream().distinct().collect(Collectors.toList()), alleles, firstAltBest, 1}); + tests.add(new Object[]{alleles.stream().distinct().collect(Collectors.toList()), alleles, secondAltBest, 2}); + tests.add(new Object[]{alleles.stream().distinct().collect(Collectors.toList()), alleles, thirdAltBest, 3}); + tests.add(new Object[]{alleles.stream().distinct().collect(Collectors.toList()), alleles, altsTied, 2}); + + return tests.toArray(new Object[][]{}); + } + + @Test(expectedExceptions = UserException.class) + public void testGetIndexesOfRelevantAllelesWithNoALT() { + final List alleles1 = new ArrayList<>(1); + alleles1.add(Allele.create("A", true)); + final List alleles2 = new ArrayList<>(1); + alleles2.add(Allele.create("A", true)); + GenotypeBuilder builder = new GenotypeBuilder(); + AlleleSubsettingUtils.getIndexesOfRelevantAllelesForGVCF(alleles1, alleles2, -1, builder.make(), false); + AlleleSubsettingUtils.getIndexesOfRelevantAllelesForGVCF(alleles1, alleles2, -1, builder.make(), true); + } + + @Test(dataProvider = "getIndexesOfRelevantAllelesData") + public void testGetIndexesOfRelevantAlleles(final int allelesIndex, final List allAlleles, final boolean isSomatic) { + final List myAlleles = new ArrayList<>(3); + + // always add the reference and alleles + myAlleles.add(allAlleles.get(0)); + myAlleles.add(Allele.NON_REF_ALLELE); + // optionally add another alternate allele + if ( allelesIndex > 0 ) + myAlleles.add(allAlleles.get(allelesIndex)); + + GenotypeBuilder builder = new GenotypeBuilder(); + + final int[] indexes = AlleleSubsettingUtils.getIndexesOfRelevantAllelesForGVCF(myAlleles, allAlleles, -1, builder.make(), isSomatic); + + Assert.assertEquals(indexes.length, allAlleles.size()); + + for ( int i = 0; i < allAlleles.size(); i++ ) { + if ( i == 0 ) + Assert.assertEquals(indexes[i], 0); // ref should always match + else if ( i == allelesIndex ) + Assert.assertEquals(indexes[i], 2); // allele + else + Assert.assertEquals(indexes[i], 1); // + } + } + + // This test asserts that when we us getINdexesOfRelevantAlleles in the case where there are multiple spanning deletions + // that we remap the PL indexes according to the BEST spanning deletion instead of the first one, which can happen if + // there were multiple spanning deletion alleles which are replaced with the same symbolic alleles before being fed to + // referenceConfidenceVariantContextMerger. + @Test (dataProvider = "getIndexesOfRelevantAllelesDataSpanningDels") + public void testGetIndexesOfRelevantAllelesMultiSpanningDel(final List allelesToFind, final List allAlleles, final Genotype g, final int expectedIndex) { + final boolean isSomatic = false; //Mutect2 doesn't output spanning deletions, so that's irrelevant + final int[] indexes = AlleleSubsettingUtils.getIndexesOfRelevantAllelesForGVCF(allAlleles, allelesToFind,-1, g, isSomatic); + + Assert.assertEquals(indexes.length, allelesToFind.size()); + + // Asserting that the expected index for the spanning deletion allele corresponds to the most likely one according to the PL + Assert.assertEquals(indexes[0], 0); // ref should always match + Assert.assertEquals(indexes[1], expectedIndex); // allele + Assert.assertEquals(indexes[2], 4); // + } + @Test(dataProvider = "updatePLsSACsAndADData") public void testUpdatePLsAndADData(final VariantContext originalVC, final VariantContext selectedVC, diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 863210228f3..2c6a4da3f70 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -571,7 +571,7 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti .map(VariantContext::getFilters).collect(Collectors.toList()); final List> actualASFilters = VariantContextTestUtils.streamVcf(filteredVcf) - .map(vc -> AnnotationUtils.decodeAnyASListWithPrintDelim(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, ""))).collect(Collectors.toList()); + .map(vc -> AnnotationUtils.decodeAnyASListWithRawDelim(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, ""))).collect(Collectors.toList()); Assert.assertEquals(actualASFilters, expectedASFilters); Assert.assertEquals(actualFilters.size(), expectedFilters.size()); diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf index 8a6c3919373..f75f28963d8 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf @@ -76,16 +76,16 @@ #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 chrM 73 . A G . PASS AS_FilterStatus=.|PASS;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31148.48 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 chrM 263 . A G . PASS AS_FilterStatus=.|PASS;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21726.37 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 -chrM 301 . A ACCC . blacklisted_site;weak_evidence AC=1;AF=0.333;AN=3;AS_FilterStatus=.|weak_evidence GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:4327,161:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 -chrM 301 . A ACC . blacklisted_site AC=1;AF=0.333;AN=3;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:4327,533:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 -chrM 302 . A AC . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0/0/0/0:114,218:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A C . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0/0/0/0:114,786:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A ACC . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1/0/0/0:114,2335:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A ACCC . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/1/0/0:114,396:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A ACCCC . blacklisted_site;possible_numt;weak_evidence AC=1;AF=0.143;AN=7;AS_FilterStatus=.|weak_evidence,possible_numt GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/1/0:114,25:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AC=1;AF=0.143;AN=7;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/0/1:114,433:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 310 . T C . base_qual;blacklisted_site AC=1;AF=0.333;AN=3;AS_FilterStatus=.|base_qual GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:1,402:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 -chrM 310 . T TC . blacklisted_site AC=1;AF=0.333;AN=3;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:1,4195:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 +chrM 301 . A ACCC . blacklisted_site;weak_evidence AS_FilterStatus=.|weak_evidence;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.81,70.60 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:4327,161:6.241e-03:5021:1395,51:1765,86:905,3422,3,691 +chrM 301 . A ACC . blacklisted_site AS_FilterStatus=.|PASS;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.81,70.60 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:4327,533:0.040:5021:1395,226:1765,234:905,3422,3,691 +chrM 302 . A AC . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/././././.:114,218:0.047:4307:28,78:24,109:101,13,473,3720 +chrM 302 . A C . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/./././.:114,786:0.132:4307:28,364:24,386:101,13,473,3720 +chrM 302 . A ACC . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/././.:114,2335:0.619:4307:28,752:24,1245:101,13,473,3720 +chrM 302 . A ACCC . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/./.:114,396:0.095:4307:28,135:24,195:101,13,473,3720 +chrM 302 . A ACCCC . blacklisted_site;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,possible_numt;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1/.:114,25:3.908e-03:4307:28,7:24,11:101,13,473,3720 +chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././././1:114,433:0.095:4307:28,153:24,150:101,13,473,3720 +chrM 310 . T C . base_qual;blacklisted_site AS_FilterStatus=.|base_qual;DP=5034;ECNT=4;MBQ=30,2,30;MFRL=169,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=105.51,12181.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:1,402:0.059:4598:1,65:0,82:0,1,561,4036 +chrM 310 . T TC . blacklisted_site AS_FilterStatus=.|PASS;DP=5034;ECNT=4;MBQ=30,2,30;MFRL=169,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=105.51,12181.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:1,4195:0.941:4598:1,1356:0,1919:0,1,561,4036 chrM 499 . G A . PASS AS_FilterStatus=.|PASS;DP=8754;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24401.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8443:1.000:8453:4,3064:2,4108:2,8,3991,4452 chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34194.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 chrM 824 . T C . possible_numt AS_FilterStatus=.|possible_numt;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=17.27 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.272e-03:10284:4925,18:5163,23:5090,5150,20,24 @@ -121,17 +121,17 @@ chrM 15326 . A G . PASS AS_FilterStatus=.|PASS;DP=10698;ECNT=1;MBQ=15,30;MFRL=39 chrM 15535 . C T . PASS AS_FilterStatus=.|PASS;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33908.94 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 chrM 16149 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=4.29 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.797e-03:5198:2065,10:2653,12:1731,3292,5,170 chrM 16175 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=4056;ECNT=8;MBQ=25,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3668,191:6.736e-03:3859:1180,16:2025,12:903,2765,6,185 -chrM 16179 . CAA C . blacklisted_site AC=1;AF=0.250;AN=4;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0:1476,647:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 -chrM 16179 . CA C . blacklisted_site AC=1;AF=0.250;AN=4;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0:1476,308:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 -chrM 16179 . CAAA C . blacklisted_site AC=1;AF=0.250;AN=4;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1:1476,264:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 +chrM 16179 . CAA C . blacklisted_site AS_FilterStatus=.|PASS;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=917.43,324.91,75.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./.:1476,647:0.268:2695:461,165:868,424:201,1275,206,1013 +chrM 16179 . CA C . blacklisted_site AS_FilterStatus=.|PASS;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=917.43,324.91,75.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/.:1476,308:0.105:2695:461,117:868,175:201,1275,206,1013 +chrM 16179 . CAAA C . blacklisted_site AS_FilterStatus=.|PASS;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=917.43,324.91,75.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1:1476,264:0.057:2695:461,48:868,189:201,1275,206,1013 chrM 16181 . A C . base_qual AS_FilterStatus=.|base_qual;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=67.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2614,721:0.159:3335:698,60:1276,245:662,1952,27,694 chrM 16182 . A C . blacklisted_site AS_FilterStatus=.|PASS;DP=3647;ECNT=8;MBQ=20,30;MFRL=396,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2961.98 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1200,1477:0.561:2677:230,442:606,872:245,955,137,1340 -chrM 16183 . A C . PASS AC=1;AF=0.167;AN=6;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0/0/0/0:97,1775:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16183 . A ACCC . PASS AC=1;AF=0.167;AN=6;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1/0/0/0:97,413:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16183 . A ACCCC . PASS AC=1;AF=0.167;AN=6;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/1/0/0:97,244:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16183 . A ACCCCC . possible_numt;weak_evidence AC=1;AF=0.167;AN=6;AS_FilterStatus=.|weak_evidence,possible_numt GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/1/0:97,50:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AC=1;AF=0.167;AN=6;AS_FilterStatus=.|possible_numt GT:AD:AF:DP:F1R2:F2R1:SB 0/0/0/0/0/1:97,69:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16189 . T C . PASS AC=1;AF=0.333;AN=3;AS_FilterStatus=.|PASS GT:AD:AF:DP:F1R2:F2R1:SB 0/1/0:3,3422:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 -chrM 16189 . T A . possible_numt;weak_evidence AC=1;AF=0.333;AN=3;AS_FilterStatus=.|weak_evidence,possible_numt GT:AD:AF:DP:F1R2:F2R1:SB 0/0/1:3,6:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 +chrM 16183 . A C . PASS AS_FilterStatus=.|PASS;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./././.:97,1775:0.701:2648:26,570:31,1047:57,40,305,2246 +chrM 16183 . A ACCC . PASS AS_FilterStatus=.|PASS;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/././.:97,413:0.163:2648:26,52:31,278:57,40,305,2246 +chrM 16183 . A ACCCC . PASS AS_FilterStatus=.|PASS;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/./.:97,244:0.101:2648:26,22:31,175:57,40,305,2246 +chrM 16183 . A ACCCCC . possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,possible_numt;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/.:97,50:5.155e-03:2648:26,2:31,35:57,40,305,2246 +chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AS_FilterStatus=.|possible_numt;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1:97,69:9.053e-03:2648:26,19:31,18:57,40,305,2246 +chrM 16189 . T C . PASS AS_FilterStatus=.|PASS;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14620.12,0.990 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,3422:0.997:3431:2,1023:1,2014:1,2,601,2827 +chrM 16189 . T A . possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,possible_numt;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14620.12,0.990 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,6:1.389e-03:3431:2,2:1,2:1,2,601,2827 chrM 16217 . T C . PASS AS_FilterStatus=.|PASS;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14889.73 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 chrM 16519 . T C . PASS AS_FilterStatus=.|PASS;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30690.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 From 9446699d1197564c4205e6f42f54d8895118198e Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 3 Feb 2020 15:22:06 -0500 Subject: [PATCH 40/85] fix warning, fixes from PR feedback --- .../AS_StrandBiasMutectAnnotation.java | 3 ++- .../genotyper/AlleleSubsettingUtils.java | 22 +++++++++---------- .../filtering/Mutect2FilteringEngine.java | 5 ++--- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java index 22b5b1def43..9fd00eac708 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java @@ -7,6 +7,7 @@ import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_StrandBiasTest; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AlleleSpecificAnnotation; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AlleleSpecificAnnotationData; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.utils.Utils; @@ -18,7 +19,7 @@ import java.util.List; import java.util.Map; -public class AS_StrandBiasMutectAnnotation extends InfoFieldAnnotation implements StandardMutectAnnotation { +public class AS_StrandBiasMutectAnnotation extends InfoFieldAnnotation implements StandardMutectAnnotation, AlleleSpecificAnnotation { private final static Logger logger = LogManager.getLogger(StrandBiasBySample.class); @Override public Map annotate(ReferenceContext ref, VariantContext vc, AlleleLikelihoods likelihoods) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java index c0e0a28c5ce..e302698b2d8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java @@ -519,7 +519,7 @@ private static int findHomIndex(final GenotypeLikelihoodCalculator calculator, f * @return non-null array of new AD values */ public static int[] generateAD(final int[] originalAD, final int[] indexesOfRelevantAlleles) { - final List adList = (List)remapRLengthList(Arrays.stream(originalAD).boxed().collect(Collectors.toList()), indexesOfRelevantAlleles); + final List adList = remapRLengthList(Arrays.stream(originalAD).boxed().collect(Collectors.toList()), indexesOfRelevantAlleles, 0); return Ints.toArray(adList); } @@ -530,7 +530,7 @@ public static int[] generateAD(final int[] originalAD, final int[] indexesOfRele * @return non-null array of new AFs */ public static double[] generateAF(final double[] originalAF, final int[] indexesOfRelevantAlleles) { - final List afList = (List)remapALengthList(Arrays.stream(originalAF).boxed().collect(Collectors.toList()), indexesOfRelevantAlleles); + final List afList = remapALengthList(Arrays.stream(originalAF).boxed().collect(Collectors.toList()), indexesOfRelevantAlleles, 0.0); return Doubles.toArray(afList); } @@ -540,11 +540,11 @@ public static double[] generateAF(final double[] originalAF, final int[] indexes * @param indexesOfRelevantAlleles * @return */ - public static List remapRLengthList(final List originalList, final int[] indexesOfRelevantAlleles) { + public static List remapRLengthList(final List originalList, final int[] indexesOfRelevantAlleles, T filler) { Utils.nonNull(originalList); Utils.nonNull(indexesOfRelevantAlleles); - return remapList(originalList, indexesOfRelevantAlleles, 0); + return remapList(originalList, indexesOfRelevantAlleles, 0, filler); } /** @@ -553,11 +553,11 @@ public static List remapRLengthList(final List originalList, final int[] i * @param indexesOfRelevantAlleles * @return */ - public static List remapALengthList(final List originalList, final int[] indexesOfRelevantAlleles) { + public static List remapALengthList(final List originalList, final int[] indexesOfRelevantAlleles, T filler) { Utils.nonNull(originalList); Utils.nonNull(indexesOfRelevantAlleles); - return remapList(originalList, indexesOfRelevantAlleles, 1); + return remapList(originalList, indexesOfRelevantAlleles, 1, filler); } /** @@ -566,15 +566,13 @@ public static List remapALengthList(final List originalList, final int[] i * @param originalList input per-allele attributes * @param indexesOfRelevantAlleles indexes of alleles to keep, including the reference * @param offset used to indicate whether to include the ref allele values in the output or not + * @param filler default value to use if no value is mapped * @return a non-null List */ - private static List remapList(final List originalList, final int[] indexesOfRelevantAlleles, - final int offset) { + private static List remapList(final List originalList, final int[] indexesOfRelevantAlleles, + final int offset, T filler) { final int numValues = indexesOfRelevantAlleles.length - offset; //since these are log odds, this should just be alts - final List newValues = new ArrayList<>(); - - //force attributes for the non-ref to go to zero, even though that allele occasionally picks up AD counts - final int filler = 0; + final List newValues = new ArrayList<>(); for ( int i = offset; i < numValues + offset; i++ ) { final int oldIndex = indexesOfRelevantAlleles[i]; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 4902f41f6b8..9d6f603d855 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -249,12 +249,11 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext } /** - * Creates a comma separated string of all the filters that apply to the allele. + * Creates a list of the string names of all the filters that apply to the allele, or the string PASS if it passed all filters * @param filtersForAllele all the filters applied to the allele - * @return encoded (comma separated) list of filters that apply to the allele + * @return list of filter names that apply to the allele or PASS */ private List getDistinctFiltersForAllele(final List filtersForAllele) { - // loop through each filter and pull out the filters the specified allele final List results = filtersForAllele.stream().distinct().collect(Collectors.toList()); if (results.size() > 1 && results.contains(VCFConstants.PASSES_FILTERS_v4)) { results.remove(VCFConstants.PASSES_FILTERS_v4); From d6f90067ef110a057978bdee619b5bcfa7eca379 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 3 Feb 2020 16:43:59 -0500 Subject: [PATCH 41/85] fix another generics issue --- .../ReferenceConfidenceVariantContextMerger.java | 10 +++++----- .../tools/walkers/variantutils/ReblockGVCF.java | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java index 0dcb15ebc3a..0cdcb995856 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java @@ -659,13 +659,13 @@ private static int[] generatePL(final Genotype g, final int[] genotypeIndexMapBy * @return array of new annotation values, may be null */ @VisibleForTesting - public static Object generateAnnotationValueVector(VCFHeaderLineCount alleleCount, - final List originalList, final int[] indexesOfRelevantAlleles) { - List newLODs = null; + public static List generateAnnotationValueVector(VCFHeaderLineCount alleleCount, + final List originalList, final int[] indexesOfRelevantAlleles) { + List newLODs = null; if (alleleCount.equals(VCFHeaderLineCount.A)) { - newLODs = AlleleSubsettingUtils.remapALengthList(originalList, indexesOfRelevantAlleles); + newLODs = AlleleSubsettingUtils.remapALengthList(originalList, indexesOfRelevantAlleles, null); } else if (alleleCount.equals(VCFHeaderLineCount.R)) { - newLODs = AlleleSubsettingUtils.remapRLengthList(originalList, indexesOfRelevantAlleles); + newLODs = AlleleSubsettingUtils.remapRLengthList(originalList, indexesOfRelevantAlleles, null); } else { //count doesn't depend on alleles newLODs = originalList; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java index dee13c1d79a..6d5b3651a7f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java @@ -499,7 +499,7 @@ protected VariantContext cleanUpHighQualityVariant(final VariantContext result, if (origMap.containsKey(rawKey)) { if (allelesNeedSubsetting && AnnotationUtils.isAlleleSpecific(annotation)) { List alleleSpecificValues = AnnotationUtils.getAlleleLengthListOfString(originalVC.getAttributeAsString(rawKey, null)); - final List subsetList = alleleSpecificValues.size() > 0 ? AlleleSubsettingUtils.remapRLengthList(alleleSpecificValues, relevantIndices) + final List subsetList = alleleSpecificValues.size() > 0 ? AlleleSubsettingUtils.remapRLengthList(alleleSpecificValues, relevantIndices, "") : Collections.nCopies(relevantIndices.length, ""); attrMap.put(rawKey, AnnotationUtils.encodeAnyASListWithRawDelim(subsetList)); } else { From ffecce735fd7bf927a256bb363c2f7d5e15202c7 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 3 Feb 2020 17:10:37 -0500 Subject: [PATCH 42/85] make unique alt read count allele specific --- .../walkers/annotator/UniqueAltReadCount.java | 24 ++++++++++--------- .../tools/walkers/mutect/Mutect2Engine.java | 2 +- .../filtering/DuplicatedAltReadFilter.java | 3 ++- .../utils/variant/GATKVCFConstants.java | 2 +- .../utils/variant/GATKVCFHeaderLines.java | 2 +- .../mutect/UniqueAltReadCountUnitTest.java | 16 +++++++------ 6 files changed, 27 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/UniqueAltReadCount.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/UniqueAltReadCount.java index 0a214eaf9da..c7ed7efc6e1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/UniqueAltReadCount.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/UniqueAltReadCount.java @@ -7,6 +7,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AlleleSpecificAnnotation; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; import org.broadinstitute.hellbender.utils.help.HelpConstants; import org.broadinstitute.hellbender.utils.read.GATKRead; @@ -35,8 +36,8 @@ *

    This annotation does not require or use any BAM file duplicate flags or UMI information, just the read alignments.

    */ @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of non-duplicate-insert ALT reads (UNIQ_ALT_READ_COUNT)") -public class UniqueAltReadCount extends InfoFieldAnnotation { - public static final String KEY = GATKVCFConstants.UNIQUE_ALT_READ_SET_COUNT_KEY; +public class UniqueAltReadCount extends InfoFieldAnnotation implements AlleleSpecificAnnotation { + public static final String KEY = GATKVCFConstants.AS_UNIQUE_ALT_READ_SET_COUNT_KEY; @Override public List getKeyNames() { @@ -53,15 +54,16 @@ public Map annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods likelihoods) { - final Allele altAllele = vc.getAlternateAllele(0); // assume single-allelic + List uniqueCountsPerAllele = vc.getAlternateAlleles().stream().map(altAllele -> { + // Build a map from the (Start Position, Fragment Size) tuple to the count of reads with that + // start position and fragment size + Map, Long> duplicateReadMap = likelihoods.bestAllelesBreakingTies().stream() + .filter(ba -> ba.allele.equals(altAllele) && ba.isInformative()) + .map(ba -> new ImmutablePair<>(ba.evidence.getStart(), ba.evidence.getFragmentLength())) + .collect(Collectors.groupingBy(x -> x, Collectors.counting())); + return duplicateReadMap.size(); + }).collect(Collectors.toList()); - // Build a map from the (Start Position, Fragment Size) tuple to the count of reads with that - // start position and fragment size - Map, Long> duplicateReadMap = likelihoods.bestAllelesBreakingTies().stream() - .filter(ba -> ba.allele.equals(altAllele) && ba.isInformative()) - .map(ba -> new ImmutablePair<>(ba.evidence.getStart(), ba.evidence.getFragmentLength())) - .collect(Collectors.groupingBy(x -> x, Collectors.counting())); - - return ImmutableMap.of(KEY, duplicateReadMap.size()); + return ImmutableMap.of(KEY, AnnotationUtils.encodeAnyASListWithRawDelim(uniqueCountsPerAllele)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 65549b2a911..1cefafade88 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -61,7 +61,7 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator { GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY, GATKVCFConstants.IN_PON_KEY, GATKVCFConstants.POPULATION_AF_KEY, GATKVCFConstants.GERMLINE_QUAL_KEY, GATKVCFConstants.CONTAMINATION_QUAL_KEY, GATKVCFConstants.SEQUENCING_QUAL_KEY, GATKVCFConstants.POLYMERASE_SLIPPAGE_QUAL_KEY, GATKVCFConstants.READ_ORIENTATION_QUAL_KEY, - GATKVCFConstants.STRAND_QUAL_KEY, GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, GATKVCFConstants.N_COUNT_KEY, GATKVCFConstants.UNIQUE_ALT_READ_SET_COUNT_KEY); + GATKVCFConstants.STRAND_QUAL_KEY, GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, GATKVCFConstants.N_COUNT_KEY, GATKVCFConstants.AS_UNIQUE_ALT_READ_SET_COUNT_KEY); private static final String MUTECT_VERSION = "2.2"; public static final String TUMOR_SAMPLE_KEY_IN_VCF_HEADER = "tumor_sample"; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java index bfa66ba43db..6f9f92c478d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java @@ -7,6 +7,7 @@ import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; // This filter checks for the case in which PCR-duplicates with unique UMIs (which we assume is caused by false adapter priming) // amplify the erroneous signal for an alternate allele. @@ -22,7 +23,7 @@ public DuplicatedAltReadFilter(final int uniqueAltReadCount) { @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - return Collections.singletonList(vc.getAttributeAsInt(UniqueAltReadCount.KEY, 1) <= uniqueAltReadCount); + return vc.getAttributeAsIntList(UniqueAltReadCount.KEY, 1).stream().map(count -> count <= uniqueAltReadCount).collect(Collectors.toList()); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index c2808b25afc..b90ad63add8 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -103,7 +103,7 @@ public final class GATKVCFConstants { public static final String READ_ORIENTATION_QUAL_KEY = "ROQ"; public static final String ORIGINAL_CONTIG_MISMATCH_KEY = "OCM"; public static final String N_COUNT_KEY = "NCount"; - public static final String UNIQUE_ALT_READ_SET_COUNT_KEY = "UNIQ_ALT_READ_COUNT"; + public static final String AS_UNIQUE_ALT_READ_SET_COUNT_KEY = "AS_UNIQ_ALT_READ_COUNT"; public static final String MEDIAN_BASE_QUALITY_KEY = "MBQ"; public static final String MEDIAN_MAPPING_QUALITY_KEY = "MMQ"; public static final String MEDIAN_FRAGMENT_LENGTH_KEY = "MFRL"; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index 014a0b70759..af15b4d5963 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -204,7 +204,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addInfoLine(new VCFInfoHeaderLine(NORMAL_ARTIFACT_LOG_10_ODDS_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Negative log 10 odds of artifact in normal with same allele fraction as tumor")); addInfoLine(new VCFInfoHeaderLine(ORIGINAL_CONTIG_MISMATCH_KEY, 1, VCFHeaderLineType.Integer, "Number of alt reads whose original alignment doesn't match the current contig.")); addInfoLine(new VCFInfoHeaderLine(N_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Count of N bases in the pileup")); - addInfoLine(new VCFInfoHeaderLine(UNIQUE_ALT_READ_SET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of ALT reads with unique start and mate end positions at a variant site")); + addInfoLine(new VCFInfoHeaderLine(AS_UNIQUE_ALT_READ_SET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of reads with unique start and mate end positions for each alt at a variant site")); addInfoLine(new BaseQuality().getDescriptions().get(0)); addInfoLine(new FragmentLength().getDescriptions().get(0)); addInfoLine(new MappingQuality().getDescriptions().get(0)); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/UniqueAltReadCountUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/UniqueAltReadCountUnitTest.java index ab1896ff789..ea2e5afd930 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/UniqueAltReadCountUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/UniqueAltReadCountUnitTest.java @@ -2,15 +2,18 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.variant.variantcontext.*; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.tools.walkers.annotator.UniqueAltReadCount; import org.broadinstitute.hellbender.utils.genotyper.*; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.testng.Assert; import org.testng.annotations.Test; import java.io.IOException; import java.util.*; +import java.util.stream.Collectors; public class UniqueAltReadCountUnitTest { final String sampleName = "Mark"; @@ -31,8 +34,8 @@ public void testSingleDuplicate() throws IOException { final Map annotations = uniqueAltReadCountAnnotation.annotate(null, vc, likelihoods); - final int uniqueReadSetCount = (int) annotations.get(UniqueAltReadCount.KEY); - Assert.assertEquals(uniqueReadSetCount, 1); + final List uniqueReadSetCount = AnnotationUtils.decodeAnyASListWithRawDelim((String)annotations.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList()); + Assert.assertEquals(uniqueReadSetCount.get(0).intValue(), 1); } @Test @@ -45,8 +48,8 @@ public void testMultipleDuplicateSets() throws IOException { final Map annotations1 = duplicateReadCountsAnnotation.annotate(null, vc, likelihoods1); - final int uniqueReadSetCount1 = (int) annotations1.get(UniqueAltReadCount.KEY); - Assert.assertEquals(uniqueReadSetCount1, numUniqueStarts1); + final List uniqueReadSetCount1 = AnnotationUtils.decodeAnyASListWithRawDelim((String) annotations1.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList()); + Assert.assertEquals(uniqueReadSetCount1.get(0).intValue(), numUniqueStarts1); // here ALT reads are all distinct final int numUniqueStarts2 = numAltReads; @@ -54,9 +57,8 @@ public void testMultipleDuplicateSets() throws IOException { final Map annotations2 = duplicateReadCountsAnnotation.annotate(null, vc, likelihoods2); - final int uniqueReadSetCount2 = (int) annotations2.get(UniqueAltReadCount.KEY); - - Assert.assertEquals(uniqueReadSetCount2, numUniqueStarts2); + final List uniqueReadSetCount2 = AnnotationUtils.decodeAnyASListWithRawDelim((String) annotations2.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList()); + Assert.assertEquals(uniqueReadSetCount2.get(0).intValue(), numUniqueStarts2); } private AlleleLikelihoods createTestLikelihoods(final Optional shiftModulus) { From 4fc762bdc4d473e1211e5f4848dfd24d15776d6b Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 4 Feb 2020 15:42:42 -0500 Subject: [PATCH 43/85] fix genotypes not included in vcf --- .../mutect/filtering/MTLowHeteroplasmyFilterTool.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java index 1947ad26f96..e86e07e462e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java @@ -55,11 +55,10 @@ public class MTLowHeteroplasmyFilterTool extends TwoPassVariantWalker { @Override public void onTraversalStart() { - final Set headerInfo = new HashSet<>(); - headerInfo.addAll(getHeaderForVariants().getMetaDataInInputOrder()); - headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_HET_FILTER_NAME)); + final VCFHeader header = getHeaderForVariants(); + header.addMetaDataLine(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_HET_FILTER_NAME)); vcfWriter = createVCFWriter(new File(outputVcf)); - vcfWriter.writeHeader(new VCFHeader(headerInfo)); + vcfWriter.writeHeader(header); } @Override From 0deea4c4bdda3faeadc2c474eea9b3a7d5c69910 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 7 Feb 2020 15:09:52 -0500 Subject: [PATCH 44/85] better test for low het filter --- .../MTLowHeteroplasmyFilterTool.java | 7 +- .../MTLowHeteroplasmyFilterToolTest.java | 48 ++++++++----- .../mito/expected_LowHetNone_output.vcf | 69 ------------------- ...HetVariantWalkerIntegrationTest_output.vcf | 69 ------------------- 4 files changed, 30 insertions(+), 163 deletions(-) delete mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf delete mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java index e86e07e462e..9e5fda70f22 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java @@ -4,9 +4,7 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; -import htsjdk.variant.vcf.VCFFilterHeaderLine; import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFHeaderLine; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; @@ -20,8 +18,6 @@ import picard.cmdline.programgroups.VariantFilteringProgramGroup; import java.io.File; -import java.util.HashSet; -import java.util.Set; @CommandLineProgramProperties( summary = "If too many low heteroplasmy sites pass other filters, then filter all low heteroplasmy sites", @@ -63,7 +59,7 @@ public void onTraversalStart() { @Override protected void firstPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { - // if the site is not filtered, but it is low het increment counter + // if the site is not filtered but it is low het, increment counter if (variant.isNotFiltered() && isLowHeteroplasmy(variant)) { unfilteredLowHetSites++; } @@ -91,7 +87,6 @@ public void closeTool() { } protected boolean isLowHeteroplasmy(VariantContext v) { - // does 0.0 make sense for orElse? return v.getGenotypes().stream().map(g -> lowestAF(g)).min(Double::compareTo).orElse(0.0) < lowHetThreshold; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java index a45c86a52a0..5472519dcd8 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java @@ -1,35 +1,45 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.CommandLineProgramTest; -import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; -import java.io.IOException; -import java.util.Arrays; public class MTLowHeteroplasmyFilterToolTest extends CommandLineProgramTest { private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); private static final File NA12878_MITO_FILTERED_VCF = new File(toolsTestDir, "mutect/mito/filtered.vcf"); @Test - public void testLowHetVariantWalker() throws IOException { - final IntegrationTestSpec testSpec = new IntegrationTestSpec( - " -R " + MITO_REF.getAbsolutePath() + - " -V " + NA12878_MITO_FILTERED_VCF + - " -O %s", - Arrays.asList(toolsTestDir + "mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf") - ); - testSpec.executeTest("testLowHetVariantWalker", this); + public void testLowHetVariantsFiltered() { + final File outputFile = createTempFile("low-het-test", ".vcf"); + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() + .addReference(MITO_REF.getAbsolutePath()) + .addArgument(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) + .addOutput(outputFile); + runCommandLine(argsBuilder); + Assert.assertTrue(VariantContextTestUtils.streamVcf(outputFile) + .map(VariantContext::getFilters).allMatch(filterSet -> filterSet.contains(GATKVCFConstants.LOW_HET_FILTER_NAME)), + "exprected all variants to have " + GATKVCFConstants.LOW_HET_FILTER_NAME + " filter."); + } - final IntegrationTestSpec testLowHetNoneSpec = new IntegrationTestSpec( - " -R " + MITO_REF.getAbsolutePath() + - " -V " + NA12878_MITO_FILTERED_VCF + - " -O %s" + - " --min-low-het-sites 5", - Arrays.asList(toolsTestDir + "mutect/mito/expected_LowHetNone_output.vcf") - ); - testLowHetNoneSpec.executeTest("testLowHetVariantWalker", this); + @Test + public void testNoLowHetVariantsFiltered() { + final File outputFile = createTempFile("no-low-het-test", ".vcf"); + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() + .addReference(MITO_REF.getAbsolutePath()) + .addArgument(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) + .addArgument(MTLowHeteroplasmyFilterTool.MIN_LOW_HET_SITES_LONG_NAME, 5) + .addOutput(outputFile); + runCommandLine(argsBuilder); + Assert.assertTrue(VariantContextTestUtils.streamVcf(outputFile) + .map(VariantContext::getFilters).noneMatch(filterSet -> filterSet.contains(GATKVCFConstants.LOW_HET_FILTER_NAME)), + "exprected no variants to have " + GATKVCFConstants.LOW_HET_FILTER_NAME + " filter."); } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf deleted file mode 100644 index 9ce1fb9283f..00000000000 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetNone_output.vcf +++ /dev/null @@ -1,69 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##GATKCommandLine= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##Mutect Version=2.1 -##contig= -##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. -##source=FilterMutectCalls -##source=Mutect2 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chrM 152 . T C . PASS AS_FilterStatus=.|PASS;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -chrM 263 . A G . numt_chimera AS_FilterStatus=.|PASS;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 -chrM 301 . A AC . low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=.|PASS|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 -chrM 310 . T TC . PASS AS_FilterStatus=.|PASS;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 -chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf deleted file mode 100644 index e686ba4c6a0..00000000000 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/expected_LowHetVariantWalkerIntegrationTest_output.vcf +++ /dev/null @@ -1,69 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##GATKCommandLine= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##Mutect Version=2.1 -##contig= -##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. -##source=FilterMutectCalls -##source=Mutect2 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chrM 152 . T C . mt_low_het AS_FilterStatus=.|PASS;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -chrM 263 . A G . mt_low_het;numt_chimera AS_FilterStatus=.|PASS;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 -chrM 301 . A AC . low_allele_frac;mt_low_het;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . mt_low_het AS_FilterStatus=.|PASS|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 -chrM 310 . T TC . mt_low_het AS_FilterStatus=.|PASS;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 -chrM 750 . A G . mt_low_het AS_FilterStatus=.|PASS;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 03aefbfff5251a5b00d5b09215f5a558b042785a Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 7 Feb 2020 15:27:55 -0500 Subject: [PATCH 45/85] changed getRequiredAnnotations to getRequiredInfoAnnotations to be more explicit --- .../tools/walkers/mutect/filtering/BaseQualityFilter.java | 2 +- .../mutect/filtering/ChimericOriginalAlignmentFilter.java | 2 +- .../tools/walkers/mutect/filtering/ClusteredEventsFilter.java | 2 +- .../tools/walkers/mutect/filtering/ContaminationFilter.java | 2 +- .../walkers/mutect/filtering/DuplicatedAltReadFilter.java | 2 +- .../walkers/mutect/filtering/FilteredHaplotypeFilter.java | 2 +- .../tools/walkers/mutect/filtering/FragmentLengthFilter.java | 2 +- .../tools/walkers/mutect/filtering/GermlineFilter.java | 2 +- .../tools/walkers/mutect/filtering/MappingQualityFilter.java | 2 +- .../walkers/mutect/filtering/MinAlleleFractionFilter.java | 2 +- .../tools/walkers/mutect/filtering/MultiallelicFilter.java | 2 +- .../tools/walkers/mutect/filtering/Mutect2AlleleFilter.java | 2 +- .../tools/walkers/mutect/filtering/Mutect2Filter.java | 2 +- .../walkers/mutect/filtering/Mutect2FilteringEngine.java | 2 +- .../tools/walkers/mutect/filtering/Mutect2VariantFilter.java | 4 +--- .../tools/walkers/mutect/filtering/NRatioFilter.java | 2 +- .../tools/walkers/mutect/filtering/NormalArtifactFilter.java | 2 +- .../hellbender/tools/walkers/mutect/filtering/NuMTFilter.java | 2 +- .../tools/walkers/mutect/filtering/PanelOfNormalsFilter.java | 2 +- .../walkers/mutect/filtering/PolymeraseSlippageFilter.java | 2 +- .../tools/walkers/mutect/filtering/ReadOrientationFilter.java | 2 +- .../tools/walkers/mutect/filtering/ReadPositionFilter.java | 2 +- .../tools/walkers/mutect/filtering/StrandArtifactFilter.java | 4 ++-- .../walkers/mutect/filtering/StrictStrandBiasFilter.java | 2 +- .../tools/walkers/mutect/filtering/TumorEvidenceFilter.java | 2 +- 25 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java index 4424b445e29..54cd50e9726 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/BaseQualityFilter.java @@ -30,5 +30,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java index b32ca32fee2..8367188d4fd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java @@ -38,5 +38,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ClusteredEventsFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ClusteredEventsFilter.java index 1f5a57df677..b7212562bb2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ClusteredEventsFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ClusteredEventsFilter.java @@ -28,5 +28,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java index 9b1ebc6d44a..e6a5c0f5a96 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java @@ -89,5 +89,5 @@ public Optional phredScaledPosteriorAnnotationName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.POPULATION_AF_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.POPULATION_AF_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java index 6f9f92c478d..3e0f6f2d8f7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/DuplicatedAltReadFilter.java @@ -32,5 +32,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(UniqueAltReadCount.KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(UniqueAltReadCount.KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java index 2f912799441..9a6375931ae 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FilteredHaplotypeFilter.java @@ -98,7 +98,7 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.emptyList(); } + protected List requiredInfoAnnotations() { return Collections.emptyList(); } @Override public Optional phredScaledPosteriorAnnotationName() { return Optional.empty(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FragmentLengthFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FragmentLengthFilter.java index 3dffef00fb9..d674e0123c9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FragmentLengthFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/FragmentLengthFilter.java @@ -30,5 +30,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_FRAGMENT_LENGTH_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_FRAGMENT_LENGTH_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/GermlineFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/GermlineFilter.java index b2275929837..d27498ad89c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/GermlineFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/GermlineFilter.java @@ -138,7 +138,7 @@ public Optional phredScaledPosteriorAnnotationName() { } @Override - protected List requiredAnnotations() { + protected List requiredInfoAnnotations() { return Arrays.asList(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY, GATKVCFConstants.POPULATION_AF_KEY); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java index 7e677c2b9be..1c00872bdfe 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MappingQualityFilter.java @@ -44,5 +44,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java index c04facde576..1dd385a6708 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java @@ -40,5 +40,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.emptyList(); } + protected List requiredInfoAnnotations() { return Collections.emptyList(); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MultiallelicFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MultiallelicFilter.java index 4994612ee48..ac173be8e56 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MultiallelicFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MultiallelicFilter.java @@ -32,5 +32,5 @@ public String filterName() { return GATKVCFConstants.MULTIALLELIC_FILTER_NAME; } - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index c0e0d431988..43b2fb78c20 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -49,7 +49,7 @@ private static LinkedHashMap> combineDataByAllele(final Link */ @Override public List errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - return requiredAnnotations().stream().allMatch(vc::hasAttribute) ? + return requiredInfoAnnotations().stream().allMatch(vc::hasAttribute) ? calculateErrorProbabilityForAlleles(vc, filteringEngine, referenceContext) .stream().map(prob -> Mutect2FilteringEngine.roundFinitePrecisionErrors(prob)).collect(Collectors.toList()) : Collections.emptyList(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java index fb869e884c5..b12e3a010d1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java @@ -21,7 +21,7 @@ protected void learnParametersAndClearAccumulatedData() { public abstract ErrorType errorType(); public abstract String filterName(); public abstract Optional phredScaledPosteriorAnnotationName(); - protected abstract List requiredAnnotations(); + protected abstract List requiredInfoAnnotations(); /** * diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 9d6f603d855..745bd56b243 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -216,7 +216,7 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext errorProbabilities.getProbabilitiesForVariantFilters().entrySet().stream() .forEach(entry -> { entry.getKey().phredScaledPosteriorAnnotationName().ifPresent(annotation -> { - if (entry.getKey().requiredAnnotations().stream().allMatch(vc::hasAttribute)) { + if (entry.getKey().requiredInfoAnnotations().stream().allMatch(vc::hasAttribute)) { vcb.attribute(annotation, QualityUtils.errorProbToQual(entry.getValue())); } }); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java index fc2a4dec5e6..985df7f4cee 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java @@ -2,9 +2,7 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.utils.IndexRange; -import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -14,7 +12,7 @@ public Mutect2VariantFilter() { } @Override public List errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { int numAltAlleles = vc.getNAlleles() - 1; - final double result = Mutect2FilteringEngine.roundFinitePrecisionErrors(requiredAnnotations().stream().allMatch(vc::hasAttribute) ? + final double result = Mutect2FilteringEngine.roundFinitePrecisionErrors(requiredInfoAnnotations().stream().allMatch(vc::hasAttribute) ? calculateErrorProbability(vc, filteringEngine, referenceContext) : 0.0); return Collections.nCopies(numAltAlleles, result); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NRatioFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NRatioFilter.java index 8c2dc738a23..b01d63a68d5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NRatioFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NRatioFilter.java @@ -38,5 +38,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.N_COUNT_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.N_COUNT_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NormalArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NormalArtifactFilter.java index 9ed0d7cad8d..560ec648657 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NormalArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NormalArtifactFilter.java @@ -69,7 +69,7 @@ public String filterName() { } @Override - protected List requiredAnnotations() { + protected List requiredInfoAnnotations() { return Arrays.asList(GATKVCFConstants.NORMAL_ARTIFACT_LOG_10_ODDS_KEY, GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java index a7f70ea66de..171a1de2d5e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java @@ -45,6 +45,6 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.emptyList(); } + protected List requiredInfoAnnotations() { return Collections.emptyList(); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PanelOfNormalsFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PanelOfNormalsFilter.java index e4038d01b18..c3386fafefc 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PanelOfNormalsFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PanelOfNormalsFilter.java @@ -21,5 +21,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.emptyList(); } + protected List requiredInfoAnnotations() { return Collections.emptyList(); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymeraseSlippageFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymeraseSlippageFilter.java index 8a4e149d866..689dcc953a1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymeraseSlippageFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/PolymeraseSlippageFilter.java @@ -75,7 +75,7 @@ public String filterName() { } @Override - protected List requiredAnnotations() { + protected List requiredInfoAnnotations() { return Arrays.asList(GATKVCFConstants.REPEATS_PER_ALLELE_KEY, GATKVCFConstants.REPEAT_UNIT_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadOrientationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadOrientationFilter.java index dc9820a626b..d6f3caf249d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadOrientationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadOrientationFilter.java @@ -68,7 +68,7 @@ public Optional phredScaledPosteriorAnnotationName() { } @Override - protected List requiredAnnotations() { return Collections.emptyList(); } + protected List requiredInfoAnnotations() { return Collections.emptyList(); } @VisibleForTesting diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java index 0cca5690f1d..8623a56c2c9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ReadPositionFilter.java @@ -33,5 +33,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_READ_POSITON_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.MEDIAN_READ_POSITON_KEY); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index de6a834528a..b95f26e888b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -91,7 +91,7 @@ public List calculateArtifactProbabilities(final VariantContext vc, final @Override protected void accumulateDataForLearning(final VariantContext vc, final ErrorProbabilities errorProbabilities, final Mutect2FilteringEngine filteringEngine) { - if (requiredAnnotations().stream().allMatch(vc::hasAttribute)) { + if (requiredInfoAnnotations().stream().allMatch(vc::hasAttribute)) { final List altESteps = calculateArtifactProbabilities(vc, filteringEngine); eSteps.addAll(altESteps); } @@ -166,7 +166,7 @@ public String filterName() { } @Override - protected List requiredAnnotations() { + protected List requiredInfoAnnotations() { return Collections.emptyList(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index 653e3ca1fed..bb498301335 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -36,5 +36,5 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.emptyList(); } + protected List requiredInfoAnnotations() { return Collections.emptyList(); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java index 7ab922e2a7e..09d0b1c1d35 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/TumorEvidenceFilter.java @@ -40,6 +40,6 @@ public String filterName() { } @Override - protected List requiredAnnotations() { return Collections.singletonList(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY); } } From 851a90c3a7a4ce769feebee42adf9197c38958cc Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 12 Feb 2020 09:48:58 -0500 Subject: [PATCH 46/85] fix low het filter to ignore ref AF --- .../mutect/filtering/MTLowHeteroplasmyFilterTool.java | 9 +++++++-- .../filtering/MTLowHeteroplasmyFilterToolTest.java | 11 +++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java index 9e5fda70f22..e933b9f00bc 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java @@ -18,6 +18,11 @@ import picard.cmdline.programgroups.VariantFilteringProgramGroup; import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; @CommandLineProgramProperties( summary = "If too many low heteroplasmy sites pass other filters, then filter all low heteroplasmy sites", @@ -91,7 +96,7 @@ protected boolean isLowHeteroplasmy(VariantContext v) { } protected double lowestAF(Genotype g) { - int[] depths = g.getAD(); - return MathUtils.arrayMin(depths)/ MathUtils.sum(depths); + List depths = Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); + return Collections.min(depths.subList(1, depths.size())) / (double) depths.stream().mapToInt(Integer::intValue).sum(); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java index 5472519dcd8..08401ebcbe9 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java @@ -10,6 +10,8 @@ import org.testng.annotations.Test; import java.io.File; +import java.util.*; +import java.util.stream.Collectors; public class MTLowHeteroplasmyFilterToolTest extends CommandLineProgramTest { private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); @@ -17,15 +19,17 @@ public class MTLowHeteroplasmyFilterToolTest extends CommandLineProgramTest { @Test public void testLowHetVariantsFiltered() { + final Set low_het_sites = new HashSet<>(Arrays.asList(301, 302)); final File outputFile = createTempFile("low-het-test", ".vcf"); final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() .addReference(MITO_REF.getAbsolutePath()) .addArgument(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) + .addArgument(MTLowHeteroplasmyFilterTool.MIN_LOW_HET_SITES_LONG_NAME, 0) .addOutput(outputFile); runCommandLine(argsBuilder); - Assert.assertTrue(VariantContextTestUtils.streamVcf(outputFile) - .map(VariantContext::getFilters).allMatch(filterSet -> filterSet.contains(GATKVCFConstants.LOW_HET_FILTER_NAME)), - "exprected all variants to have " + GATKVCFConstants.LOW_HET_FILTER_NAME + " filter."); + Set variants = VariantContextTestUtils.streamVcf(outputFile) + .filter(vcf -> vcf.getFilters().contains(GATKVCFConstants.LOW_HET_FILTER_NAME)).collect(Collectors.toSet()); + Assert.assertEquals(variants.stream().map(var -> var.getStart()).collect(Collectors.toList()), low_het_sites, "exprected these sites to have " + GATKVCFConstants.LOW_HET_FILTER_NAME + " filter."); } @Test @@ -34,7 +38,6 @@ public void testNoLowHetVariantsFiltered() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() .addReference(MITO_REF.getAbsolutePath()) .addArgument(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) - .addArgument(MTLowHeteroplasmyFilterTool.MIN_LOW_HET_SITES_LONG_NAME, 5) .addOutput(outputFile); runCommandLine(argsBuilder); Assert.assertTrue(VariantContextTestUtils.streamVcf(outputFile) From 9e59a3fe972d01c837c9788c95ad3b0c8320eed9 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 12 Feb 2020 17:54:50 -0500 Subject: [PATCH 47/85] remove . in AS_FilterStatus for ref, and change PASS to . --- .../mutect/filtering/ErrorProbabilities.java | 20 ++-------------- .../filtering/Mutect2FilteringEngine.java | 21 ++++++++-------- .../variant/GATKVariantContextUtils.java | 22 +++++++++++++++++ .../mutect/Mutect2IntegrationTest.java | 24 +++++++++---------- 4 files changed, 47 insertions(+), 40 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 1ee0521f664..2af15875438 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -5,6 +5,7 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import java.util.*; import java.util.function.Function; @@ -32,14 +33,7 @@ public ErrorProbabilities(final List filters, final VariantContex .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (a, b) -> a, LinkedHashMap::new)); // if vc has symbolic alleles, remove them from each filter list - if (vc.hasSymbolicAlleles()) { - List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); - // convert allele index to alt allele index - List symAltIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-1).collect(Collectors.toList()); - - alleleProbabilitiesByFilter.replaceAll((k, v) -> removeItemsByIndex(v, symAltIndexes)); - } - + alleleProbabilitiesByFilter.replaceAll((k, v) -> GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, v)); LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); // convert the data so we have a list of probabilities by allele instead of filter @@ -61,16 +55,6 @@ public ErrorProbabilities(final List filters, final VariantContex combinedErrorProbabilitiesByAllele.replaceAll(trueProb -> Mutect2FilteringEngine.roundFinitePrecisionErrors(1.0 - trueProb)); } - private List removeItemsByIndex(List probs, List indexesToRemove) { - List updated = new ArrayList<>(); - new IndexRange(0, probs.size()).forEach(i -> { - if (!indexesToRemove.contains(i)) { - updated.add(probs.get(i)); - } - }); - return updated; - } - public List getCombinedErrorProbabilities() { return combinedErrorProbabilitiesByAllele; } public List getTechnicalArtifactProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.ARTIFACT); } public List getNonSomaticProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.NON_SOMATIC); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 745bd56b243..a3413c9e532 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -18,6 +18,7 @@ import org.broadinstitute.hellbender.utils.*; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import java.io.File; @@ -191,12 +192,11 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext .map(entry -> addFilterStrings(entry.getValue(), errorThreshold, entry.getKey().filterName())).collect(Collectors.toList()); // for each allele, merge all allele specific filters -// List> ASFiltersIterator = ASFilters.stream().map(list -> list.listIterator()).collect(Collectors.toList()); List> filtersByAllele = ErrorProbabilities.transpose(alleleStatusByFilter); List> distinctFiltersByAllele = filtersByAllele.stream().map(this::getDistinctFiltersForAllele).collect(Collectors.toList()); ListIterator mergedFilterStringByAllele = distinctFiltersByAllele.stream().map(AnnotationUtils::encodeStringList).collect(Collectors.toList()).listIterator(); - List orderedASFilterStrings = vc.getAlleles().stream().map(allele -> allele.isReference() || allele.isSymbolic() ? + List orderedASFilterStrings = vc.getAlternateAlleles().stream().map(allele -> allele.isSymbolic() ? VCFConstants.EMPTY_INFO_FIELD : mergedFilterStringByAllele.next()).collect(Collectors.toList()); String finalAttrString = AnnotationUtils.encodeAnyASListWithRawDelim(orderedASFilterStrings); @@ -229,8 +229,10 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext // if all alleles have been filtered out, but for different reasons, fail the site. // if the site is only ref and symbolic, no filters will be applied so don't fail if (siteFiltersWithErrorProb.isEmpty() && !distinctFiltersByAllele.stream().allMatch(List::isEmpty)) { + List> filtersNonSymbolicAlleles = GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, distinctFiltersByAllele); // if any allele passed, don't fail the site - if (!distinctFiltersByAllele.stream().flatMap(List::stream).anyMatch(f -> f.equals(VCFConstants.PASSES_FILTERS_v4))) { + if (!filtersNonSymbolicAlleles.stream().anyMatch(filterList -> filterList.contains(VCFConstants.EMPTY_INFO_FIELD))) { +// if (!distinctFiltersByAllele.stream().flatMap(List::stream).anyMatch(f -> f.equals(VCFConstants.PASSES_FILTERS_v4))) { // we know the allele level filters exceeded their threshold - so set this prob to 1 siteFiltersWithErrorProb.put(GATKVCFConstants.FAIL, 1.0); } @@ -249,23 +251,22 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext } /** - * Creates a list of the string names of all the filters that apply to the allele, or the string PASS if it passed all filters + * Creates a list of the string names of all the filters that apply to the allele, or the string . if it passed all filters * @param filtersForAllele all the filters applied to the allele - * @return list of filter names that apply to the allele or PASS + * @return list of filter names that apply to the allele or . */ private List getDistinctFiltersForAllele(final List filtersForAllele) { final List results = filtersForAllele.stream().distinct().collect(Collectors.toList()); - if (results.size() > 1 && results.contains(VCFConstants.PASSES_FILTERS_v4)) { - results.remove(VCFConstants.PASSES_FILTERS_v4); - } else if (results.isEmpty()) { - results.add(VCFConstants.PASSES_FILTERS_v4); + results.remove(VCFConstants.PASSES_FILTERS_v4); + if (results.isEmpty()) { + results.add(VCFConstants.EMPTY_INFO_FIELD); } return results; } /** * For each allele, determine whether the filter should be applied and return either the - * filter name or PASS + * filter name or PASS. We use PASS as a place holder because the results are per alt allele. * @param probabilities the probabilities computed by the filter for the alleles * @param errorThreshold the theshold to use to determine whether filter applies * @param filterName the name of the filter being evaluated diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 4fbe6e2cb16..1c30d7ab77d 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -1983,5 +1983,27 @@ public static boolean isUnmixedMnpIgnoringNonRef(final VariantContext vc) { return true; } + public static List removeDataForSymbolicAltAlleles(VariantContext vc, List data) { + if (vc.hasSymbolicAlleles()) { + List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); + // convert allele index to alt allele index + List symAltIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-1).collect(Collectors.toList()); + return removeItemsByIndex(data, symAltIndexes); + } else { + return data; + } + } + + public static List removeItemsByIndex(List data, List indexesToRemove) { + List updated = new ArrayList<>(); + new IndexRange(0, data.size()).forEach(i -> { + if (!indexesToRemove.contains(i)) { + updated.add(data.get(i)); + } + }); + return updated; + } + + } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 2c6a4da3f70..b1f5e1ff227 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -522,13 +522,13 @@ public Object[][] vcfsForFiltering() { Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.FAIL)), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // .|strand_bias, strict_stand - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), // .|numt_chimera - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|PASS|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4), // .|PASS - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // .|weak_evidence, strand_bias, strict_stand|low_allele_frac, possible_numt + Arrays.asList(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // strand_bias, strict_stand + Arrays.asList(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), // numt_chimera + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // weak_evidence, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // weak_evidence, strand_bias, strict_stand|low_allele_frac, possible_numt )}, {NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( @@ -540,11 +540,11 @@ public Object[][] vcfsForFiltering() { GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME)), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD), //".|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // ".|weak_evidence, possible_numt|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.PASSES_FILTERS_v4, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|PASS|weak_evidence, base_qual, low_allele_frac|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // ".|weak_evidence, base_qual, map_qual, contamination, position, low_allele_frac, possible_numt|." + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), //".", + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual|.", + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "weak_evidence, possible_numt|.", + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, low_allele_frac|.", + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "weak_evidence, base_qual, map_qual, contamination, position, low_allele_frac, possible_numt|." )} }; } From 5459d4f8d2236245e627ddd4bd897c40254467be Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 14 Feb 2020 14:11:16 -0500 Subject: [PATCH 48/85] fix bug in removing symbolic data --- .../filtering/Mutect2FilteringEngine.java | 1 - .../filtering/StrandArtifactFilter.java | 22 ++++++++++--------- .../filtering/StrictStrandBiasFilter.java | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index a3413c9e532..3c2afb167a8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -232,7 +232,6 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext List> filtersNonSymbolicAlleles = GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, distinctFiltersByAllele); // if any allele passed, don't fail the site if (!filtersNonSymbolicAlleles.stream().anyMatch(filterList -> filterList.contains(VCFConstants.EMPTY_INFO_FIELD))) { -// if (!distinctFiltersByAllele.stream().flatMap(List::stream).anyMatch(f -> f.equals(VCFConstants.PASSES_FILTERS_v4))) { // we know the allele level filters exceeded their threshold - so set this prob to 1 siteFiltersWithErrorProb.put(GATKVCFConstants.FAIL, 1.0); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index b95f26e888b..c6bb942ca86 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -11,6 +11,7 @@ import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.OptimizationUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import java.util.*; import java.util.function.DoubleUnaryOperator; @@ -61,15 +62,16 @@ public List calculateArtifactProbabilities(final VariantContext vc, final } // remove symbolic alleles if (vc.hasSymbolicAlleles()) { - final List> unfilteredSbs = new ArrayList<>(sbs); - sbs.clear(); - List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); - List symIndexes = vc.getAlleleIndices(symbolicAlleles); - new IndexRange(0, sbs.size()).forEach(i -> { - if (!symIndexes.contains(i)) { - sbs.add(unfilteredSbs.get(i)); - } - }); +// final List> unfilteredSbs = new ArrayList<>(sbs); +// sbs.clear(); + sbs = GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, sbs); +// List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); +// List symIndexes = vc.getAlleleIndices(symbolicAlleles); +// new IndexRange(0, unfilteredSbs.size()).forEach(i -> { +// if (!symIndexes.contains(i)) { +// sbs.add(unfilteredSbs.get(i)); +// } +// }); } final List indelSizes = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList()); @@ -167,7 +169,7 @@ public String filterName() { @Override protected List requiredInfoAnnotations() { - return Collections.emptyList(); + return Collections.singletonList(GATKVCFConstants.AS_SB_TABLE_KEY); } private double artifactStrandLogLikelihood(final int strandCount, final int strandAltCount) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index bb498301335..2aeecb068f3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -36,5 +36,5 @@ public String filterName() { } @Override - protected List requiredInfoAnnotations() { return Collections.emptyList(); } + protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.AS_SB_TABLE_KEY); } } From 30b59dcc25b202e6ab4b1a352d0994ac9058ac36 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 18 Feb 2020 14:39:14 -0500 Subject: [PATCH 49/85] add test for uniq alt read count --- .../walkers/mutect/filtering/StrandArtifactFilter.java | 9 --------- .../hellbender/utils/variant/GATKVCFHeaderLines.java | 4 ++-- .../tools/walkers/mutect/Mutect2IntegrationTest.java | 5 +++-- .../tools/mutect/mito/unfiltered-with-assb.vcf | 5 +++-- 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index c6bb942ca86..df1653ae9e6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -62,16 +62,7 @@ public List calculateArtifactProbabilities(final VariantContext vc, final } // remove symbolic alleles if (vc.hasSymbolicAlleles()) { -// final List> unfilteredSbs = new ArrayList<>(sbs); -// sbs.clear(); sbs = GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, sbs); -// List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); -// List symIndexes = vc.getAlleleIndices(symbolicAlleles); -// new IndexRange(0, unfilteredSbs.size()).forEach(i -> { -// if (!symIndexes.contains(i)) { -// sbs.add(unfilteredSbs.get(i)); -// } -// }); } final List indelSizes = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index af15b4d5963..004d5bf7eb7 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -130,7 +130,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addInfoLine(new VCFInfoHeaderLine(CLIPPING_RANK_SUM_KEY, 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); addInfoLine(new VCFInfoHeaderLine(FISHER_STRAND_KEY, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias")); addInfoLine(new VCFInfoHeaderLine(AS_FISHER_STRAND_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "allele specific phred-scaled p-value using Fisher's exact test to detect strand bias of each alt allele")); - addInfoLine(new VCFInfoHeaderLine(AS_SB_TABLE_KEY, 1, VCFHeaderLineType.String, "Allele-specific forward/reverse read counts for strand bias tests")); + addInfoLine(new VCFInfoHeaderLine(AS_SB_TABLE_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.String, "Allele-specific forward/reverse read counts for strand bias tests")); addInfoLine(new VCFInfoHeaderLine(NOCALL_CHROM_KEY, 1, VCFHeaderLineType.Integer, "Number of no-called samples")); addInfoLine(new VCFInfoHeaderLine(GQ_MEAN_KEY, 1, VCFHeaderLineType.Float, "Mean of all GQ values")); addInfoLine(new VCFInfoHeaderLine(GQ_STDEV_KEY, 1, VCFHeaderLineType.Float, "Standard deviation of all GQ values")); @@ -204,7 +204,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addInfoLine(new VCFInfoHeaderLine(NORMAL_ARTIFACT_LOG_10_ODDS_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Negative log 10 odds of artifact in normal with same allele fraction as tumor")); addInfoLine(new VCFInfoHeaderLine(ORIGINAL_CONTIG_MISMATCH_KEY, 1, VCFHeaderLineType.Integer, "Number of alt reads whose original alignment doesn't match the current contig.")); addInfoLine(new VCFInfoHeaderLine(N_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Count of N bases in the pileup")); - addInfoLine(new VCFInfoHeaderLine(AS_UNIQUE_ALT_READ_SET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of reads with unique start and mate end positions for each alt at a variant site")); + addInfoLine(new VCFInfoHeaderLine(AS_UNIQUE_ALT_READ_SET_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of reads with unique start and mate end positions for each alt at a variant site")); addInfoLine(new BaseQuality().getDescriptions().get(0)); addInfoLine(new FragmentLength().getDescriptions().get(0)); addInfoLine(new MappingQuality().getDescriptions().get(0)); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index b1f5e1ff227..3eab9a9d4f2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -519,15 +519,15 @@ public Object[][] vcfsForFiltering() { GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), Collections.emptySet(), Collections.emptySet(), - Collections.emptySet(), + ImmutableSet.of(GATKVCFConstants.DUPLICATED_EVIDENCE_FILTER_NAME), ImmutableSet.of(GATKVCFConstants.FAIL)), Arrays.asList( Arrays.asList(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // strand_bias, strict_stand Arrays.asList(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), // numt_chimera Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // weak_evidence, low_allele_frac, possible_numt Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . + Arrays.asList(GATKVCFConstants.DUPLICATED_EVIDENCE_FILTER_NAME), // duplicate Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // weak_evidence, strand_bias, strict_stand|low_allele_frac, possible_numt )}, @@ -561,6 +561,7 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti args -> args.add(M2FiltersArgumentCollection.MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, autosomalCoverage), args -> args.add(M2FiltersArgumentCollection.MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, 4.0), args -> args.add(M2FiltersArgumentCollection.MIN_READS_ON_EACH_STRAND_LONG_NAME, 1), + args -> args.add(M2FiltersArgumentCollection.UNIQUE_ALT_READ_COUNT_LONG_NAME, 2), args -> { intervals.stream().map(SimpleInterval::new).forEach(args::addInterval); return args; diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf index 4afa7b91d99..a57b8f7c4ca 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered-with-assb.vcf @@ -31,7 +31,8 @@ ##INFO= ##INFO= ##INFO= -##INFO= +##INFO= +##INFO= ##Mutect Version=2.1 ##contig= ##filtering_status=Warning: unfiltered Mutect2 calls. Please run FilterMutectCalls to remove false positives. @@ -42,5 +43,5 @@ chrM 263 . A G . . AS_SB_TABLE=1,0|431,400;DP=858;ECNT=4;TLOD=2641.72;POPAF=5.00 chrM 301 . A AC . . AS_SB_TABLE=500,149|41,12;DP=680;ECNT=4;TLOD=3.32;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 chrM 302 . A AC,C,ACC . . AS_SB_TABLE=0,5|51,350|60,7|49,0;DP=659;ECNT=4;TLOD=891.23,10.66,67.66;POPAF=5.000e-08,5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 chrM 310 . T TC . . AS_SB_TABLE=0,0|300,358;DP=705;ECNT=4;TLOD=1974.89;POPAF=5.000e-08;RPA=5,6;RU=C;STR;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 -chrM 750 . A G . . AS_SB_TABLE=0,1|100,1424;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true +chrM 750 . A G . . AS_UNIQ_ALT_READ_COUNT=2;AS_SB_TABLE=0,1|100,1424;DP=1568;ECNT=1;TLOD=5097.90;POPAF=5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true chrM 802 . A C,ACC . . AS_SB_TABLE=55,50|0,301|30,37;DP=659;ECNT=4;TLOD=10.66,891.23;POPAF=5.000e-08,5.000e-08;OCM=0 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2:105,301,67:0.636,0.141:2,163,35:3,238,32:20,20,30:419,316,340:60,60,60:41,33 From e702456f0b219c2ab8c551172177cc91813907f7 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 18 Feb 2020 16:19:35 -0500 Subject: [PATCH 50/85] add final tests --- .../mutect/filtering/ErrorProbabilities.java | 2 -- .../filtering/Mutect2FilteringEngine.java | 6 +++--- .../mutect/filtering/StrandArtifactFilter.java | 4 +--- .../filtering/StrictStrandBiasFilter.java | 6 +++++- .../utils/variant/GATKVariantContextUtils.java | 13 +++++++++++-- .../walkers/mutect/Mutect2IntegrationTest.java | 14 +++++++------- .../tools/mitochondria/NA12878.MT.g.vcf | 9 +++++---- .../tools/mitochondria/NA12878.MT.g.vcf.idx | Bin 199 -> 198 bytes 8 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index 2af15875438..e06526a21be 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -1,9 +1,7 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; -import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 3c2afb167a8..62508ce7f8f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -292,10 +292,10 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { filters.add(new TumorEvidenceFilter()); filters.add(new BaseQualityFilter(MTFAC.minMedianBaseQuality)); filters.add(new MappingQualityFilter(MTFAC.minMedianMappingQuality, MTFAC.longIndelLength)); - filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); // test - filters.add(new StrandArtifactFilter()); // test gvcf + filters.add(new DuplicatedAltReadFilter(MTFAC.uniqueAltReadCount)); + filters.add(new StrandArtifactFilter()); filters.add(new ContaminationFilter(MTFAC.contaminationTables, MTFAC.contaminationEstimate)); - filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); // test gvcf + filters.add(new StrictStrandBiasFilter(MTFAC.minReadsOnEachStrand)); filters.add(new ReadPositionFilter(MTFAC.minMedianReadPosition)); filters.add(new MinAlleleFractionFilter(MTFAC.minAf)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java index df1653ae9e6..8062ba5681f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrandArtifactFilter.java @@ -1,13 +1,11 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; import com.google.common.annotations.VisibleForTesting; -import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.math3.util.CombinatoricsUtils; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.tools.walkers.validation.basicshortmutpileup.BetaBinomialDistribution; -import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.OptimizationUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -62,7 +60,7 @@ public List calculateArtifactProbabilities(final VariantContext vc, final } // remove symbolic alleles if (vc.hasSymbolicAlleles()) { - sbs = GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, sbs); + sbs = GATKVariantContextUtils.removeDataForSymbolicAlleles(vc, sbs); } final List indelSizes = vc.getAlternateAlleles().stream().map(alt -> Math.abs(vc.getReference().length() - alt.length())).collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java index 2aeecb068f3..112e26c1a7d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/StrictStrandBiasFilter.java @@ -4,7 +4,7 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; -import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; +import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import java.util.Collections; import java.util.List; @@ -26,6 +26,10 @@ public List areAllelesArtifacts(final VariantContext vc, final Mutect2F if (minReadsOnEachStrand == 0 || sbs == null || sbs.isEmpty() || sbs.size() <= 1) { return Collections.emptyList(); } + // remove symbolic alleles + if (vc.hasSymbolicAlleles()) { + sbs = GATKVariantContextUtils.removeDataForSymbolicAlleles(vc, sbs); + } // skip the reference return sbs.subList(1, sbs.size()).stream().map(altList -> altList.stream().anyMatch(x -> x == 0)).collect(Collectors.toList()); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 1c30d7ab77d..2cd41ccc726 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -1984,10 +1984,19 @@ public static boolean isUnmixedMnpIgnoringNonRef(final VariantContext vc) { } public static List removeDataForSymbolicAltAlleles(VariantContext vc, List data) { + return removeDataForSymbolicAlleles(vc, data, false); + } + + public static List removeDataForSymbolicAlleles(VariantContext vc, List data) { + return removeDataForSymbolicAlleles(vc, data, true); + } + + protected static List removeDataForSymbolicAlleles(VariantContext vc, List data, boolean dataContainsReference) { if (vc.hasSymbolicAlleles()) { List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); - // convert allele index to alt allele index - List symAltIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-1).collect(Collectors.toList()); + // convert allele index to index for data + int offset = dataContainsReference ? 0 : 1; + List symAltIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-offset).collect(Collectors.toList()); return removeItemsByIndex(data, symAltIndexes); } else { return data; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 3eab9a9d4f2..92010f59af6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -533,18 +533,18 @@ public Object[][] vcfsForFiltering() { )}, {NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( Collections.emptySet(), - ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), - ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME), + ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME), + ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME,GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, - GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME)), + GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME)), Arrays.asList( Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), //".", - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual|.", - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "weak_evidence, possible_numt|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, low_allele_frac|.", - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "weak_evidence, base_qual, map_qual, contamination, position, low_allele_frac, possible_numt|." + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual, strand_bias|.", + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "weak_evidence, strict_strand, strand_bias, possible_numt|.", + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|.", + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "weak_evidence, base_qual, map_qual, contamination, strand_artifact, position, low_allele_frac, possible_numt|." )} }; } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.MT.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.MT.g.vcf index 3e5050b7614..d61be83cc9f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.MT.g.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.MT.g.vcf @@ -25,6 +25,7 @@ ##GVCFBlock0-5=minGQ=0(inclusive),maxGQ=5(exclusive) ##GVCFBlock10-2147483647=minGQ=10(inclusive),maxGQ=2147483647(exclusive) ##GVCFBlock5-10=minGQ=5(inclusive),maxGQ=10(exclusive) +##INFO= ##INFO= ##INFO= ##INFO= @@ -155,9 +156,9 @@ MT 31 . C . . END=31 GT:DP:MIN_DP:TLOD 0/0:1338:1338:-3.107e+00 MT 32 . A . . END=34 GT:DP:MIN_DP:TLOD 0/0:1435:1360:-2.827e+00 MT 35 . G . . END=35 GT:DP:MIN_DP:TLOD 0/0:1576:1576:-1.333e+00 MT 36 . G . . END=36 GT:DP:MIN_DP:TLOD 0/0:1614:1614:-2.286e+00 -MT 37 . A G, . . DP=1594;ECNT=4;MBQ=27,6,0;MFRL=575,15999,0;MMQ=60,60,0;MPOS=7,0;POPAF=7.30,7.30;SAAF=0.010,0.010,7.242e-03;SAPP=3.734e-04,4.515e-03,0.995;TLOD=-2.556e+00,-2.886e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2:1508,11,0:1.092e-03,6.494e-04:1519:674,7,0:834,4,0 +MT 37 . A G, . . AS_SB_TABLE=1000,508|8,3|0,0;DP=1594;ECNT=4;MBQ=27,6,0;MFRL=575,15999,0;MMQ=60,60,0;MPOS=7,0;POPAF=7.30,7.30;SAAF=0.010,0.010,7.242e-03;SAPP=3.734e-04,4.515e-03,0.995;TLOD=-2.556e+00,-2.886e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2:1508,11,0:1.092e-03,6.494e-04:1519:674,7,0:834,4,0 MT 38 . G . . END=39 GT:DP:MIN_DP:TLOD 0/0:1554:1534:-3.194e+00 -MT 40 . T G, . . DP=1709;ECNT=4;MBQ=28,21,0;MFRL=562,16148,0;MMQ=60,60,0;MPOS=27,0;POPAF=7.30,7.30;SAAF=0.010,0.00,5.851e-04;SAPP=1.826e-04,5.519e-04,0.999;TLOD=-1.650e+00,-2.932e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:1708,1,0:1.148e-03,5.841e-04:1709:766,1,0:942,0,0:0|1:40_T_G:40 +MT 40 . T G, . . AS_SB_TABLE=1000,708|1,0|0,0;DP=1709;ECNT=4;MBQ=28,21,0;MFRL=562,16148,0;MMQ=60,60,0;MPOS=27,0;POPAF=7.30,7.30;SAAF=0.010,0.00,5.851e-04;SAPP=1.826e-04,5.519e-04,0.999;TLOD=-1.650e+00,-2.932e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:1708,1,0:1.148e-03,5.841e-04:1709:766,1,0:942,0,0:0|1:40_T_G:40 MT 41 . C . . END=42 GT:DP:MIN_DP:TLOD 0/0:1678:1657:-3.231e+00 MT 43 . C A, . . DP=1833;ECNT=4;MBQ=27,27,0;MFRL=532,16148,0;MMQ=60,60,0;MPOS=30,0;POPAF=7.30,7.30;SAAF=0.010,0.00,5.456e-04;SAPP=1.650e-04,5.440e-04,0.999;TLOD=-1.710e+00,-2.963e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:1832,1,0:1.069e-03,5.447e-04:1833:819,1,0:1013,0,0:0|1:40_T_G:40 MT 44 . C . . END=50 GT:DP:MIN_DP:TLOD 0/0:1904:1779:-3.311e+00 @@ -210,9 +211,9 @@ MT 132 . C . . END=142 GT:DP:MIN_DP:TLOD 0/0:5872:5676:-3.776e+00 MT 143 . G A,C, . . DP=6192;ECNT=17;MBQ=29,29,2,0;MFRL=456,8242,543,0;MMQ=60,60,60,0;MPOS=37,12,0;POPAF=7.30,7.30,7.30;SAAF=0.010,0.010,9.870e-04;SAPP=1.829e-04,1.642e-04,1.000;TLOD=-2.341e+00,-3.195e+00,-3.309e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2/3:6073,6,7,0:6.092e-04,2.090e-04,1.635e-04:6086:2878,2,3,0:3195,4,4,0 MT 144 . C . . END=150 GT:DP:MIN_DP:TLOD 0/0:6275:6144:-3.763e+00 MT 151 . C A, . . DP=6495;ECNT=17;MBQ=28,13,0;MFRL=440,8341,0;MMQ=60,60,0;MPOS=14,0;POPAF=7.30,7.30;SAAF=0.010,0.010,6.260e-04;SAPP=1.238e-04,1.191e-04,1.000;TLOD=-3.505e+00,-3.507e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2:6386,4,0:1.561e-04,1.557e-04:6390:2990,2,0:3396,2,0 -MT 152 . T C,A, . . DP=6523;ECNT=17;MBQ=29,29,13,0;MFRL=291,438,16046,0;MMQ=60,60,60,0;MPOS=37,23,0;POPAF=7.30,7.30,7.30;SAAF=0.990,0.990,0.999;SAPP=0.028,0.024,0.948;TLOD=20915.39,-3.329e+00,-3.333e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2/3:7,6417,8,0:0.999,1.562e-04,1.549e-04:6432:4,3002,2,0:3,3415,6,0 +MT 152 . T C,A, . . AS_SB_TABLE=5,2|3487,3000|6,2|0,0;DP=6523;ECNT=17;MBQ=29,29,13,0;MFRL=291,438,16046,0;MMQ=60,60,60,0;MPOS=37,23,0;POPAF=7.30,7.30,7.30;SAAF=0.990,0.990,0.999;SAPP=0.028,0.024,0.948;TLOD=20915.39,-3.329e+00,-3.333e+00 GT:AD:AF:DP:F1R2:F2R1 0/1/2/3:7,6417,8,0:0.999,1.562e-04,1.549e-04:6432:4,3002,2,0:3,3415,6,0 MT 153 . A . . END=156 GT:DP:MIN_DP:TLOD 0/0:6486:6477:-3.814e+00 -MT 157 . T A, . . DP=6625;ECNT=17;MBQ=29,0,0;MFRL=429,0,0;MMQ=60,0,0;MPOS=0,0;POPAF=7.30,7.30;SAAF=0.00,0.00,0.00;SAPP=4.826e-05,4.803e-05,1.000;TLOD=-3.517e+00,-3.517e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:6546,0,0:1.519e-04,1.519e-04:6546:3057,0,0:3489,0,0:0|1:157_T_A:157 +MT 157 . T A, . . AS_SB_TABLE=546,6|0,0|0,0;DP=6625;ECNT=17;MBQ=29,0,0;MFRL=429,0,0;MMQ=60,0,0;MPOS=0,0;POPAF=7.30,7.30;SAAF=0.00,0.00,0.00;SAPP=4.826e-05,4.803e-05,1.000;TLOD=-3.517e+00,-3.517e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:6546,0,0:1.519e-04,1.519e-04:6546:3057,0,0:3489,0,0:0|1:157_T_A:157 MT 158 . T . . END=162 GT:DP:MIN_DP:TLOD 0/0:6523:6483:-3.765e+00 MT 163 . G GCA, . . DP=6669;ECNT=17;MBQ=25,0,0;MFRL=419,0,0;MMQ=60,0,0;MPOS=0,0;POPAF=7.30,7.30;SAAF=0.00,0.00,0.00;SAPP=4.745e-05,4.832e-05,1.000;TLOD=-3.520e+00,-3.520e+00 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS 0|1|2:6582,0,0:1.510e-04,1.510e-04:6582:3078,0,0:3504,0,0:0|1:157_T_A:157 MT 164 . C . . END=165 GT:DP:MIN_DP:TLOD 0/0:6517:6510:-3.807e+00 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.MT.g.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.MT.g.vcf.idx index 76d53648aa296e52bba57e03eebaee2acd1d80fe..f7cd6e69ef2cc3291ed1f4fda9bb41b78aa166ed 100644 GIT binary patch delta 57 zcmX@kc#Ls^6mMciVrp@5PNjZweoE>@jcDGNT$~IbP_XZEWWmG<>H^^=U_Jv(0ss?+ B4>14$ delta 58 zcmX@cc${&96kkeWS!PO7YMy>lQGQ~|MD=K187@u+5Xfj;lASrRUtNIF1k7iENdN%$ CKMf}U From ff2051ece7f3e4998672625a933f8bc030ae9b91 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 20 Feb 2020 09:59:31 -0500 Subject: [PATCH 51/85] fix error in LeftAlignAndTrim after rebase --- .../tools/walkers/variantutils/LeftAlignAndTrimVariants.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java index ce7a3a3fbf6..e0fc028f360 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java @@ -176,6 +176,7 @@ public class LeftAlignAndTrimVariants extends VariantWalker { private boolean suppressReferencePath = false; private VariantContextWriter vcfWriter = null; + private VCFHeader vcfHeader = null; VariantContext lastVariant; From fc585dab814eb16b42568299e6607f8c4c174277 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 20 Feb 2020 14:58:19 -0500 Subject: [PATCH 52/85] fix as splitting in left align and trim... --- .../utils/variant/GATKVariantContextUtils.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 2cd41ccc726..ae0046adf80 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -1407,7 +1407,6 @@ public static List splitSomaticVariantContextToBiallelics(final } else { final List biallelics = new LinkedList<>(); - int altIndex = 1; for (final Allele alt : vc.getAlternateAlleles()) { final VariantContextBuilder builder = new VariantContextBuilder(vc); @@ -1416,23 +1415,23 @@ public static List splitSomaticVariantContextToBiallelics(final builder.alleles(alleles); // split allele specific filters - int index = vc.getAlleleIndex(alt); + int alleleIndex = vc.getAlleleIndex(alt); + int altIndex = alleleIndex - 1; // the reason we are getting as list and then joining on , is because the default getAttributeAsString for a list will add spaces between items which we don't // want to have to trim out later in the code String asfiltersStr = String.join(",", vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD)); List filtersList = AnnotationUtils.decodeAnyASListWithRawDelim(asfiltersStr); - if (filtersList.size() > index) { - String filters = filtersList.get(index); + if (filtersList.size() > altIndex) { + String filters = filtersList.get(altIndex); if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { AnnotationUtils.decodeAnyASList(filters).stream().forEach(filter -> builder.filter(filter)); } - builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeAnyASListWithRawDelim(new ArrayList<>(Arrays.asList(filtersList.get(0), filters)))); + builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, filters); } - builder.genotypes(AlleleSubsettingUtils.subsetSomaticAlleles(outputHeader, vc.getGenotypes(), alleles, new int[]{0, altIndex})); + builder.genotypes(AlleleSubsettingUtils.subsetSomaticAlleles(outputHeader, vc.getGenotypes(), alleles, new int[]{0, alleleIndex})); final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true); biallelics.add(trimmed); - altIndex++; } return biallelics; } From 343cd5f3929de9dbcf44c4cee531ae94cdc85ba2 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 21 Feb 2020 14:53:24 -0500 Subject: [PATCH 53/85] updated test for split multi allelics --- .../expected_split_with_AS_filters.vcf | 137 +++++++++--------- .../test_split_with_AS_filters.vcf | 113 ++++++++------- 2 files changed, 126 insertions(+), 124 deletions(-) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf index f75f28963d8..7ee7d5c3c91 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf @@ -29,12 +29,12 @@ ##FORMAT= ##FORMAT= ##FORMAT= -##FORMAT= +##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= -##GATKCommandLine= +##GATKCommandLine= ##INFO= ##INFO= ##INFO= @@ -42,6 +42,8 @@ ##INFO= ##INFO= ##INFO= +##INFO= +##INFO= ##INFO= ##INFO= ##INFO= @@ -50,12 +52,12 @@ ##INFO= ##INFO= ##INFO= -##INFO= +##INFO= ##INFO= -##INFO= +##INFO= ##INFO= ##INFO= -##INFO= +##INFO= ##INFO= ##INFO= ##INFO= @@ -65,8 +67,7 @@ ##INFO= ##INFO= ##INFO= -##INFO= -##INFO= +##INFO= ##MutectVersion=2.2 ##contig= ##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. @@ -74,64 +75,64 @@ ##source=FilterMutectCalls ##tumor_sample=01C05110 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 -chrM 73 . A G . PASS AS_FilterStatus=.|PASS;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31148.48 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 -chrM 263 . A G . PASS AS_FilterStatus=.|PASS;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21726.37 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 -chrM 301 . A ACCC . blacklisted_site;weak_evidence AS_FilterStatus=.|weak_evidence;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.81,70.60 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:4327,161:6.241e-03:5021:1395,51:1765,86:905,3422,3,691 -chrM 301 . A ACC . blacklisted_site AS_FilterStatus=.|PASS;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.81,70.60 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:4327,533:0.040:5021:1395,226:1765,234:905,3422,3,691 -chrM 302 . A AC . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/././././.:114,218:0.047:4307:28,78:24,109:101,13,473,3720 -chrM 302 . A C . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/./././.:114,786:0.132:4307:28,364:24,386:101,13,473,3720 -chrM 302 . A ACC . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/././.:114,2335:0.619:4307:28,752:24,1245:101,13,473,3720 -chrM 302 . A ACCC . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/./.:114,396:0.095:4307:28,135:24,195:101,13,473,3720 -chrM 302 . A ACCCC . blacklisted_site;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,possible_numt;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1/.:114,25:3.908e-03:4307:28,7:24,11:101,13,473,3720 -chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=.|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././././1:114,433:0.095:4307:28,153:24,150:101,13,473,3720 -chrM 310 . T C . base_qual;blacklisted_site AS_FilterStatus=.|base_qual;DP=5034;ECNT=4;MBQ=30,2,30;MFRL=169,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=105.51,12181.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:1,402:0.059:4598:1,65:0,82:0,1,561,4036 -chrM 310 . T TC . blacklisted_site AS_FilterStatus=.|PASS;DP=5034;ECNT=4;MBQ=30,2,30;MFRL=169,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=105.51,12181.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:1,4195:0.941:4598:1,1356:0,1919:0,1,561,4036 -chrM 499 . G A . PASS AS_FilterStatus=.|PASS;DP=8754;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24401.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8443:1.000:8453:4,3064:2,4108:2,8,3991,4452 -chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34194.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 -chrM 824 . T C . possible_numt AS_FilterStatus=.|possible_numt;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=17.27 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.272e-03:10284:4925,18:5163,23:5090,5150,20,24 -chrM 827 . A G . PASS AS_FilterStatus=.|PASS;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34471.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 -chrM 1438 . A G . PASS AS_FilterStatus=.|PASS;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35249.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 -chrM 2706 . A G . PASS AS_FilterStatus=.|PASS;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34760.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 -chrM 3380 . G A . PASS AS_FilterStatus=.|PASS;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=638.42 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.041:10442:5044,209:4684,218:5228,4776,226,212 -chrM 3547 . A G . PASS AS_FilterStatus=.|PASS;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34395.11 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 -chrM 3552 . T C . PASS AS_FilterStatus=.|PASS;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=174.68 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.016:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 -chrM 3565 . A C . base_qual AS_FilterStatus=.|base_qual;DP=10330;ECNT=4;MBQ=30,10;MFRL=394,401;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=24.76 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9441,463:7.375e-03:9904:3959,41:4613,20:0|1:3547_A_G:3547:4647,4794,10,453 -chrM 3577 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=10292;ECNT=4;MBQ=30,10;MFRL=395,394;MMQ=60,60;MPOS=42;OCM=0;POPAF=2.40;TLOD=2.78 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9730,268:2.480e-03:9998:3662,30:4408,8:0|1:3547_A_G:3547:4405,5325,240,28 -chrM 4769 . A G . PASS AS_FilterStatus=.|PASS;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33487.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 -chrM 4820 . G A . PASS AS_FilterStatus=.|PASS;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33895.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 -chrM 4977 . T C . PASS AS_FilterStatus=.|PASS;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33801.64 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 -chrM 5629 . C T . PASS AS_FilterStatus=.|PASS;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=293.78 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10034,242:0.022:10276:5041,137:4759,100:5266,4768,128,114 -chrM 6473 . C T . PASS AS_FilterStatus=.|PASS;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34747.10 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 -chrM 6722 . G A . PASS AS_FilterStatus=.|PASS;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34924.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 -chrM 7028 . C T . PASS AS_FilterStatus=.|PASS;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35080.38 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 -chrM 7241 . A G . PASS AS_FilterStatus=.|PASS;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33416.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 -chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=.|PASS;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26845.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 -chrM 8736 . T C . PASS AS_FilterStatus=.|PASS;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=2958.33 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.137:10498:4635,731:4237,699:4550,4495,778,675 -chrM 8860 . A G . PASS AS_FilterStatus=.|PASS;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35049.07 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:1.000:10584:4,5291:6,4983:5,6,5415,5158 -chrM 9098 . T C . PASS AS_FilterStatus=.|PASS;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35204.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 -chrM 9950 . T C . PASS AS_FilterStatus=.|PASS;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34440.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 -chrM 10818 . AAC A . possible_numt AS_FilterStatus=.|possible_numt;DP=10642;ECNT=1;MBQ=30,30;MFRL=398,413;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;RPA=2,1;RU=AC;STR;TLOD=124.41 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10048,73:7.193e-03:10121:5053,34:4549,37:4903,5145,38,35 -chrM 11177 . C T . PASS AS_FilterStatus=.|PASS;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34441.45 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 -chrM 11276 . T C . PASS AS_FilterStatus=.|PASS;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1691.31 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.089:10353:4797,486:4475,427:4746,4664,480,463 -chrM 11719 . G A . PASS AS_FilterStatus=.|PASS;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34853.42 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 -chrM 13590 . G A . PASS AS_FilterStatus=.|PASS;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34042.44 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 -chrM 13606 . A G . PASS AS_FilterStatus=.|PASS;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=405.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10056,330:0.030:10386:5060,168:4774,151:5057,4999,175,155 -chrM 14766 . C T . PASS AS_FilterStatus=.|PASS;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33444.52 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 -chrM 15326 . A G . PASS AS_FilterStatus=.|PASS;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34380.43 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 -chrM 15535 . C T . PASS AS_FilterStatus=.|PASS;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33908.94 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 -chrM 16149 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=4.29 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.797e-03:5198:2065,10:2653,12:1731,3292,5,170 -chrM 16175 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=4056;ECNT=8;MBQ=25,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3668,191:6.736e-03:3859:1180,16:2025,12:903,2765,6,185 -chrM 16179 . CAA C . blacklisted_site AS_FilterStatus=.|PASS;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=917.43,324.91,75.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./.:1476,647:0.268:2695:461,165:868,424:201,1275,206,1013 -chrM 16179 . CA C . blacklisted_site AS_FilterStatus=.|PASS;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=917.43,324.91,75.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/.:1476,308:0.105:2695:461,117:868,175:201,1275,206,1013 -chrM 16179 . CAAA C . blacklisted_site AS_FilterStatus=.|PASS;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=917.43,324.91,75.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1:1476,264:0.057:2695:461,48:868,189:201,1275,206,1013 -chrM 16181 . A C . base_qual AS_FilterStatus=.|base_qual;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=67.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2614,721:0.159:3335:698,60:1276,245:662,1952,27,694 -chrM 16182 . A C . blacklisted_site AS_FilterStatus=.|PASS;DP=3647;ECNT=8;MBQ=20,30;MFRL=396,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2961.98 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1200,1477:0.561:2677:230,442:606,872:245,955,137,1340 -chrM 16183 . A C . PASS AS_FilterStatus=.|PASS;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./././.:97,1775:0.701:2648:26,570:31,1047:57,40,305,2246 -chrM 16183 . A ACCC . PASS AS_FilterStatus=.|PASS;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/././.:97,413:0.163:2648:26,52:31,278:57,40,305,2246 -chrM 16183 . A ACCCC . PASS AS_FilterStatus=.|PASS;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/./.:97,244:0.101:2648:26,22:31,175:57,40,305,2246 -chrM 16183 . A ACCCCC . possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,possible_numt;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/.:97,50:5.155e-03:2648:26,2:31,35:57,40,305,2246 -chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AS_FilterStatus=.|possible_numt;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1:97,69:9.053e-03:2648:26,19:31,18:57,40,305,2246 -chrM 16189 . T C . PASS AS_FilterStatus=.|PASS;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14620.12,0.990 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,3422:0.997:3431:2,1023:1,2014:1,2,601,2827 -chrM 16189 . T A . possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,possible_numt;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14620.12,0.990 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,6:1.389e-03:3431:2,2:1,2:1,2,601,2827 -chrM 16217 . T C . PASS AS_FilterStatus=.|PASS;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14889.73 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 -chrM 16519 . T C . PASS AS_FilterStatus=.|PASS;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30690.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 +chrM 73 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,1|4689,4744;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31111.83 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 +chrM 263 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,6|2124,4436;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21614.00 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 +chrM 301 . A ACCC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=906,3426|3,158|0,533;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.86,70.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:4332,161:6.324e-03:5026:1397,51:1767,86:906,3426,3,691 +chrM 301 . A ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=906,3426|3,158|0,533;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.86,70.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:4332,533:0.040:5026:1397,226:1767,234:906,3426,3,691 +chrM 302 . A AC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/././././.:118,218:0.049:4311:31,78:25,109:103,15,473,3720 +chrM 302 . A C . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/./././.:118,786:0.134:4311:31,364:25,386:103,15,473,3720 +chrM 302 . A ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/././.:118,2335:0.637:4311:31,752:25,1245:103,15,473,3720 +chrM 302 . A ACCC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/./.:118,396:0.098:4311:31,135:25,195:103,15,473,3720 +chrM 302 . A ACCCC . blacklisted_site;possible_numt;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1/.:118,25:4.163e-03:4311:31,7:25,11:103,15,473,3720 +chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././././1:118,433:0.071:4311:31,153:25,150:103,15,473,3720 +chrM 310 . T C . base_qual;blacklisted_site;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=1,2|357,46|206,3990;DP=5034;ECNT=4;MBQ=20,2,30;MFRL=159,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=95.66,12037.35 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,403:0.049:4602:3,66:0,82:1,2,563,4036 +chrM 310 . T TC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=1,2|357,46|206,3990;DP=5034;ECNT=4;MBQ=20,2,30;MFRL=159,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=95.66,12037.35 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,4196:0.950:4602:3,1356:0,1920:1,2,563,4036 +chrM 499 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=2,8|3987,4458;DP=8755;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24353.87 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8445:1.000:8455:4,3065:2,4110:2,8,3987,4458 +chrM 750 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5187,5182;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34231.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 +chrM 824 . T C . possible_numt;strand_bias AS_FilterStatus=strand_bias,possible_numt;AS_SB_TABLE=5090,5150|20,24;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=23.47 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.440e-03:10284:4925,18:5163,23:5090,5150,20,24 +chrM 827 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5119,5160;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34480.59 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 +chrM 1438 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,4|5322,5192;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35232.81 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 +chrM 2706 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,1|5027,5476;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34736.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 +chrM 3380 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=5228,4776|226,212;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=682.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.040:10442:5044,209:4684,218:5228,4776,226,212 +chrM 3547 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,4|4781,5338;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34104.77 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 +chrM 3552 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4667,5218|89,95;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=190.32 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.017:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 +chrM 3565 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=4647,4794|10,453;DP=10330;ECNT=4;MBQ=30,10;MFRL=394,401;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=22.50 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9441,463:6.928e-03:9904:3959,41:4613,20:0|1:3547_A_G:3547:4647,4794,10,453 +chrM 3577 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=4405,5325|240,28;DP=10292;ECNT=4;MBQ=30,10;MFRL=395,394;MMQ=60,60;MPOS=42;OCM=0;POPAF=2.40;TLOD=1.90 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9730,268:2.324e-03:9998:3662,30:4408,8:0|1:3547_A_G:3547:4405,5325,240,28 +chrM 4769 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5260,4910;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33493.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 +chrM 4820 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5561,4676;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33900.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 +chrM 4977 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,4|4601,5682;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33756.90 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 +chrM 5629 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=5266,4764|129,114;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=318.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10030,243:0.023:10273:5041,137:4759,101:5266,4764,129,114 +chrM 6473 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=6,2|5308,5177;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34705.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 +chrM 6722 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5257,5112;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34855.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 +chrM 7028 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5252,5296;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35041.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 +chrM 7241 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5084,5262;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33382.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 +chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=.;AS_SB_TABLE=24,8|4147,4789;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26951.12 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 +chrM 8736 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4550,4495|778,675;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=3047.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.138:10498:4635,731:4237,699:4550,4495,778,675 +chrM 8860 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,6|5415,5158;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35027.25 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:0.999:10584:4,5291:6,4983:5,6,5415,5158 +chrM 9098 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=6,1|5173,5354;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35177.72 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 +chrM 9950 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=1,4|4725,5702;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34370.22 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 +chrM 10818 . AAC A . possible_numt AS_FilterStatus=possible_numt;AS_SB_TABLE=4903,5145|38,35;DP=10642;ECNT=1;MBQ=30,30;MFRL=398,413;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;RPA=2,1;RU=AC;STR;TLOD=125.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10048,73:7.543e-03:10121:5053,34:4549,37:4903,5145,38,35 +chrM 11177 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5589,4844;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34411.63 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 +chrM 11276 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4746,4664|480,463;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1757.06 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.090:10353:4797,486:4475,427:4746,4664,480,463 +chrM 11719 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=4,1|5153,5407;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34816.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 +chrM 13590 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,1|5146,5293;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33992.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 +chrM 13606 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5057,5000|175,155;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=430.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10057,330:0.030:10387:5060,168:4774,151:5057,5000,175,155 +chrM 14766 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=7,6|5582,4740;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33392.24 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 +chrM 15326 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|5077,5355;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34351.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 +chrM 15535 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=1,13|5374,5027;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33834.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 +chrM 16149 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=1731,3292|5,170;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=3.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.489e-03:5198:2065,10:2653,12:1731,3292,5,170 +chrM 16175 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=903,2767|6,185;DP=4056;ECNT=8;MBQ=20,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.16 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3670,191:6.729e-03:3861:1180,16:2025,12:903,2767,6,185 +chrM 16179 . CAA C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./.:1480,647:0.272:2701:462,165:869,424:201,1279,206,1015 +chrM 16179 . CA C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/.:1480,308:0.107:2701:462,117:869,175:201,1279,206,1015 +chrM 16179 . CAAA C . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1:1480,266:0.057:2701:462,48:869,191:201,1279,206,1015 +chrM 16181 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=662,1960|27,694;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=65.50 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2622,721:0.157:3343:700,60:1279,245:662,1960,27,694 +chrM 16182 . A C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=246,976|137,1340;DP=3647;ECNT=8;MBQ=20,30;MFRL=397,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2944.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1222,1477:0.559:2699:233,442:621,872:246,976,137,1340 +chrM 16183 . A C . PASS AS_FilterStatus=.;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./././.:97,1779:0.698:2656:26,572:31,1049:57,40,306,2253 +chrM 16183 . A ACCC . PASS AS_FilterStatus=.;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/././.:97,413:0.165:2656:26,52:31,278:57,40,306,2253 +chrM 16183 . A ACCCC . strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/./.:97,246:0.101:2656:26,22:31,177:57,40,306,2253 +chrM 16183 . A ACCCCC . possible_numt;weak_evidence AS_FilterStatus=weak_evidence,possible_numt;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/.:97,51:5.638e-03:2656:26,2:31,36:57,40,306,2253 +chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AS_FilterStatus=possible_numt;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1:97,70:8.474e-03:2656:26,19:31,19:57,40,306,2253 +chrM 16189 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=1,2|596,2826|5,1;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14669.67,3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,3422:0.998:3431:2,1023:1,2014:1,2,601,2827 +chrM 16189 . T A . possible_numt;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=1,2|596,2826|5,1;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14669.67,3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,6:1.168e-03:3431:2,2:1,2:1,2,601,2827 +chrM 16217 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|1114,3004;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14884.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 +chrM 16519 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=3,3|4970,4301;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30665.46 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf index 213c376eb03..7ec5bd00085 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf @@ -29,15 +29,17 @@ ##FORMAT= ##FORMAT= ##FORMAT= -##FORMAT= +##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= -##GATKCommandLine= -##GATKCommandLine= -##GATKCommandLine= +##GATKCommandLine= +##GATKCommandLine= +##GATKCommandLine= ##INFO= +##INFO= +##INFO= ##INFO= ##INFO= ##INFO= @@ -46,12 +48,12 @@ ##INFO= ##INFO= ##INFO= -##INFO= +##INFO= ##INFO= -##INFO= +##INFO= ##INFO= ##INFO= -##INFO= +##INFO= ##INFO= ##INFO= ##INFO= @@ -61,8 +63,7 @@ ##INFO= ##INFO= ##INFO= -##INFO= -##INFO= +##INFO= ##MutectVersion=2.2 ##contig= ##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. @@ -71,50 +72,50 @@ ##source=VariantFiltration ##tumor_sample=01C05110 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 -chrM 73 . A G . PASS AS_FilterStatus=.|PASS;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31148.48 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 -chrM 263 . A G . PASS AS_FilterStatus=.|PASS;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21726.37 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 -chrM 301 . A ACCC,ACC . blacklisted_site AS_FilterStatus=.|weak_evidence|PASS;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.81,70.60 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:4327,161,533:6.241e-03,0.040:5021:1395,51,226:1765,86,234:905,3422,3,691 -chrM 302 . A AC,C,ACC,ACCC,ACCCC,ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=.|PASS|PASS|PASS|PASS|weak_evidence,possible_numt|PASS;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=397,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=218.88,432.96,4403.96,416.59,5.11,126.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5/6:114,218,786,2335,396,25,433:0.047,0.132,0.619,0.095,3.908e-03,0.095:4307:28,78,364,752,135,7,153:24,109,386,1245,195,11,150:101,13,473,3720 -chrM 310 . T C,TC . blacklisted_site AS_FilterStatus=.|base_qual|PASS;DP=5034;ECNT=4;MBQ=30,2,30;MFRL=169,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=105.51,12181.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:1,402,4195:0.059,0.941:4598:1,65,1356:0,82,1919:0,1,561,4036 -chrM 499 . G A . PASS AS_FilterStatus=.|PASS;DP=8754;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24401.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8443:1.000:8453:4,3064:2,4108:2,8,3991,4452 -chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34194.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 -chrM 824 . T C . possible_numt AS_FilterStatus=.|possible_numt;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=17.27 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.272e-03:10284:4925,18:5163,23:5090,5150,20,24 -chrM 827 . A G . PASS AS_FilterStatus=.|PASS;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34471.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 -chrM 1438 . A G . PASS AS_FilterStatus=.|PASS;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35249.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 -chrM 2706 . A G . PASS AS_FilterStatus=.|PASS;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34760.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 -chrM 3380 . G A . PASS AS_FilterStatus=.|PASS;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=638.42 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.041:10442:5044,209:4684,218:5228,4776,226,212 -chrM 3547 . A G . PASS AS_FilterStatus=.|PASS;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34395.11 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 -chrM 3552 . T C . PASS AS_FilterStatus=.|PASS;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=174.68 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.016:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 -chrM 3565 . A C . base_qual AS_FilterStatus=.|base_qual;DP=10330;ECNT=4;MBQ=30,10;MFRL=394,401;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=24.76 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9441,463:7.375e-03:9904:3959,41:4613,20:0|1:3547_A_G:3547:4647,4794,10,453 -chrM 3577 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=10292;ECNT=4;MBQ=30,10;MFRL=395,394;MMQ=60,60;MPOS=42;OCM=0;POPAF=2.40;TLOD=2.78 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9730,268:2.480e-03:9998:3662,30:4408,8:0|1:3547_A_G:3547:4405,5325,240,28 -chrM 4769 . A G . PASS AS_FilterStatus=.|PASS;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33487.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 -chrM 4820 . G A . PASS AS_FilterStatus=.|PASS;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33895.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 -chrM 4977 . T C . PASS AS_FilterStatus=.|PASS;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33801.64 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 -chrM 5629 . C T . PASS AS_FilterStatus=.|PASS;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=293.78 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10034,242:0.022:10276:5041,137:4759,100:5266,4768,128,114 -chrM 6473 . C T . PASS AS_FilterStatus=.|PASS;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34747.10 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 -chrM 6722 . G A . PASS AS_FilterStatus=.|PASS;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34924.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 -chrM 7028 . C T . PASS AS_FilterStatus=.|PASS;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35080.38 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 -chrM 7241 . A G . PASS AS_FilterStatus=.|PASS;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33416.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 -chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=.|PASS;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26845.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 -chrM 8736 . T C . PASS AS_FilterStatus=.|PASS;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=2958.33 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.137:10498:4635,731:4237,699:4550,4495,778,675 -chrM 8860 . A G . PASS AS_FilterStatus=.|PASS;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35049.07 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:1.000:10584:4,5291:6,4983:5,6,5415,5158 -chrM 9098 . T C . PASS AS_FilterStatus=.|PASS;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35204.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 -chrM 9950 . T C . PASS AS_FilterStatus=.|PASS;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34440.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 -chrM 10818 . AAC A . possible_numt AS_FilterStatus=.|possible_numt;DP=10642;ECNT=1;MBQ=30,30;MFRL=398,413;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;RPA=2,1;RU=AC;STR;TLOD=124.41 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10048,73:7.193e-03:10121:5053,34:4549,37:4903,5145,38,35 -chrM 11177 . C T . PASS AS_FilterStatus=.|PASS;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34441.45 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 -chrM 11276 . T C . PASS AS_FilterStatus=.|PASS;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1691.31 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.089:10353:4797,486:4475,427:4746,4664,480,463 -chrM 11719 . G A . PASS AS_FilterStatus=.|PASS;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34853.42 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 -chrM 13590 . G A . PASS AS_FilterStatus=.|PASS;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34042.44 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 -chrM 13606 . A G . PASS AS_FilterStatus=.|PASS;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=405.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10056,330:0.030:10386:5060,168:4774,151:5057,4999,175,155 -chrM 14766 . C T . PASS AS_FilterStatus=.|PASS;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33444.52 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 -chrM 15326 . A G . PASS AS_FilterStatus=.|PASS;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34380.43 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 -chrM 15535 . C T . PASS AS_FilterStatus=.|PASS;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33908.94 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 -chrM 16149 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=4.29 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.797e-03:5198:2065,10:2653,12:1731,3292,5,170 -chrM 16175 . A C . base_qual;weak_evidence AS_FilterStatus=.|weak_evidence,base_qual;DP=4056;ECNT=8;MBQ=25,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3668,191:6.736e-03:3859:1180,16:2025,12:903,2765,6,185 -chrM 16179 . CAAA CA,CAA,C . blacklisted_site AS_FilterStatus=.|PASS|PASS|PASS;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=917.43,324.91,75.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3:1476,647,308,264:0.268,0.105,0.057:2695:461,165,117,48:868,424,175,189:201,1275,206,1013 -chrM 16181 . A C . base_qual AS_FilterStatus=.|base_qual;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=67.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2614,721:0.159:3335:698,60:1276,245:662,1952,27,694 -chrM 16182 . A C . blacklisted_site AS_FilterStatus=.|PASS;DP=3647;ECNT=8;MBQ=20,30;MFRL=396,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2961.98 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1200,1477:0.561:2677:230,442:606,872:245,955,137,1340 -chrM 16183 . A C,ACCC,ACCCC,ACCCCC,ACCCCCCCCCCCCCCCC . PASS AS_FilterStatus=.|PASS|PASS|PASS|weak_evidence,possible_numt|possible_numt;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,380,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4093.33,489.71,190.91,1.38,11.04 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5:97,1775,413,244,50,69:0.701,0.163,0.101,5.155e-03,9.053e-03:2648:26,570,52,22,2,19:31,1047,278,175,35,18:57,40,305,2246 -chrM 16189 . T C,A . PASS AS_FilterStatus=.|PASS|weak_evidence,possible_numt;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14620.12,0.990 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:3,3422,6:0.997,1.389e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 -chrM 16217 . T C . PASS AS_FilterStatus=.|PASS;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14889.73 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 -chrM 16519 . T C . PASS AS_FilterStatus=.|PASS;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30690.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 +chrM 73 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,1|4689,4744;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31111.83 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 +chrM 263 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,6|2124,4436;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21614.00 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 +chrM 301 . A ACCC,ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias|strand_bias;AS_SB_TABLE=906,3426|3,158|0,533;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.86,70.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:4332,161,533:6.324e-03,0.040:5026:1397,51,226:1767,86,234:906,3426,3,691 +chrM 302 . A AC,C,ACC,ACCC,ACCCC,ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=strand_bias|strand_bias|strand_bias|strand_bias|weak_evidence,strand_bias,possible_numt|.;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5/6:118,218,786,2335,396,25,433:0.049,0.134,0.637,0.098,4.163e-03,0.071:4311:31,78,364,752,135,7,153:25,109,386,1245,195,11,150:103,15,473,3720 +chrM 310 . T C,TC . blacklisted_site;strand_bias AS_FilterStatus=base_qual,strand_bias|strand_bias;AS_SB_TABLE=1,2|357,46|206,3990;DP=5034;ECNT=4;MBQ=20,2,30;MFRL=159,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=95.66,12037.35 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:3,403,4196:0.049,0.950:4602:3,66,1356:0,82,1920:1,2,563,4036 +chrM 499 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=2,8|3987,4458;DP=8755;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24353.87 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8445:1.000:8455:4,3065:2,4110:2,8,3987,4458 +chrM 750 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5187,5182;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34231.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 +chrM 824 . T C . possible_numt;strand_bias AS_FilterStatus=strand_bias,possible_numt;AS_SB_TABLE=5090,5150|20,24;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=23.47 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.440e-03:10284:4925,18:5163,23:5090,5150,20,24 +chrM 827 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5119,5160;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34480.59 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 +chrM 1438 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,4|5322,5192;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35232.81 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 +chrM 2706 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,1|5027,5476;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34736.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 +chrM 3380 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=5228,4776|226,212;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=682.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.040:10442:5044,209:4684,218:5228,4776,226,212 +chrM 3547 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,4|4781,5338;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34104.77 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 +chrM 3552 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4667,5218|89,95;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=190.32 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.017:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 +chrM 3565 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=4647,4794|10,453;DP=10330;ECNT=4;MBQ=30,10;MFRL=394,401;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=22.50 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9441,463:6.928e-03:9904:3959,41:4613,20:0|1:3547_A_G:3547:4647,4794,10,453 +chrM 3577 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=4405,5325|240,28;DP=10292;ECNT=4;MBQ=30,10;MFRL=395,394;MMQ=60,60;MPOS=42;OCM=0;POPAF=2.40;TLOD=1.90 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9730,268:2.324e-03:9998:3662,30:4408,8:0|1:3547_A_G:3547:4405,5325,240,28 +chrM 4769 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5260,4910;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33493.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 +chrM 4820 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5561,4676;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33900.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 +chrM 4977 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,4|4601,5682;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33756.90 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 +chrM 5629 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=5266,4764|129,114;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=318.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10030,243:0.023:10273:5041,137:4759,101:5266,4764,129,114 +chrM 6473 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=6,2|5308,5177;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34705.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 +chrM 6722 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5257,5112;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34855.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 +chrM 7028 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5252,5296;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35041.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 +chrM 7241 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5084,5262;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33382.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 +chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=.;AS_SB_TABLE=24,8|4147,4789;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26951.12 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 +chrM 8736 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4550,4495|778,675;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=3047.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.138:10498:4635,731:4237,699:4550,4495,778,675 +chrM 8860 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,6|5415,5158;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35027.25 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:0.999:10584:4,5291:6,4983:5,6,5415,5158 +chrM 9098 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=6,1|5173,5354;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35177.72 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 +chrM 9950 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=1,4|4725,5702;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34370.22 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 +chrM 10818 . AAC A . possible_numt AS_FilterStatus=possible_numt;AS_SB_TABLE=4903,5145|38,35;DP=10642;ECNT=1;MBQ=30,30;MFRL=398,413;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;RPA=2,1;RU=AC;STR;TLOD=125.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10048,73:7.543e-03:10121:5053,34:4549,37:4903,5145,38,35 +chrM 11177 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5589,4844;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34411.63 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 +chrM 11276 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4746,4664|480,463;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1757.06 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.090:10353:4797,486:4475,427:4746,4664,480,463 +chrM 11719 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=4,1|5153,5407;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34816.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 +chrM 13590 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,1|5146,5293;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33992.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 +chrM 13606 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5057,5000|175,155;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=430.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10057,330:0.030:10387:5060,168:4774,151:5057,5000,175,155 +chrM 14766 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=7,6|5582,4740;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33392.24 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 +chrM 15326 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|5077,5355;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34351.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 +chrM 15535 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=1,13|5374,5027;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33834.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 +chrM 16149 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=1731,3292|5,170;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=3.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.489e-03:5198:2065,10:2653,12:1731,3292,5,170 +chrM 16175 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=903,2767|6,185;DP=4056;ECNT=8;MBQ=20,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.16 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3670,191:6.729e-03:3861:1180,16:2025,12:903,2767,6,185 +chrM 16179 . CAAA CA,CAA,C . blacklisted_site AS_FilterStatus=.|.|strand_bias;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3:1480,647,308,266:0.272,0.107,0.057:2701:462,165,117,48:869,424,175,191:201,1279,206,1015 +chrM 16181 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=662,1960|27,694;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=65.50 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2622,721:0.157:3343:700,60:1279,245:662,1960,27,694 +chrM 16182 . A C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=246,976|137,1340;DP=3647;ECNT=8;MBQ=20,30;MFRL=397,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2944.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1222,1477:0.559:2699:233,442:621,872:246,976,137,1340 +chrM 16183 . A C,ACCC,ACCCC,ACCCCC,ACCCCCCCCCCCCCCCC . PASS AS_FilterStatus=.|.|strand_bias|weak_evidence,possible_numt|possible_numt;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5:97,1779,413,246,51,70:0.698,0.165,0.101,5.638e-03,8.474e-03:2656:26,572,52,22,2,19:31,1049,278,177,36,19:57,40,306,2253 +chrM 16189 . T C,A . PASS AS_FilterStatus=.|weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=1,2|596,2826|5,1;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14669.67,3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:3,3422,6:0.998,1.168e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 +chrM 16217 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|1114,3004;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14884.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 +chrM 16519 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=3,3|4970,4301;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30665.46 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 From 4697b02c148ff79da35fbbfb5b5ba9761c7317ab Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 21 Feb 2020 16:56:59 -0500 Subject: [PATCH 54/85] needed to update another test file --- .../hellbender/tools/mutect/mito/filtered.vcf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf index c72b2c7b6ab..1cb074133f7 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf @@ -60,9 +60,9 @@ ##source=FilterMutectCalls ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chrM 152 . T C . PASS AS_FilterStatus=.|PASS;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -chrM 263 . A G . numt_chimera AS_FilterStatus=.|PASS;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 -chrM 301 . A AC . low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=.|PASS|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 -chrM 310 . T TC . PASS AS_FilterStatus=.|PASS;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 -chrM 750 . A G . PASS AS_FilterStatus=.|PASS;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true +chrM 152 . T C . PASS AS_FilterStatus=.;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . numt_chimera AS_FilterStatus=.;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 301 . A AC . low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . PASS AS_FilterStatus=.;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . PASS AS_FilterStatus=.;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 94fa473fb9fb7aecf7e59a649dfc8e7f09d9b8dc Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 4 Mar 2020 18:11:50 -0500 Subject: [PATCH 55/85] fix split multiallelics to work for all info fields --- .../allelespecific/StrandBiasUtils.java | 2 +- .../variant/GATKVariantContextUtils.java | 91 ++++++++++++++++--- .../MTLowHeteroplasmyFilterToolTest.java | 6 +- .../expected_split_with_AS_filters.vcf | 40 ++++---- 4 files changed, 102 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java index 942dd45e998..90386d5e342 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java @@ -45,7 +45,7 @@ protected static String makeRawAnnotationString(final List vcAlleles, fi } - protected static String encode(List alleleValues) { + public static String encode(List alleleValues) { return String.join(",", alleleValues.stream().map(i -> i.toString()).collect(Collectors.toList())); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index ae0046adf80..66b46da831f 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -3,6 +3,7 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.Locatable; import htsjdk.tribble.TribbleException; +import htsjdk.utils.ValidationUtils; import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; @@ -10,11 +11,13 @@ import htsjdk.variant.vcf.*; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.Validate; import org.apache.commons.lang3.tuple.MutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; import org.broadinstitute.hellbender.tools.walkers.genotyper.*; import org.broadinstitute.hellbender.utils.*; import org.broadinstitute.hellbender.utils.param.ParamUtils; @@ -1407,28 +1410,29 @@ public static List splitSomaticVariantContextToBiallelics(final } else { final List biallelics = new LinkedList<>(); + List attrsSpecialFormats = new ArrayList(Arrays.asList(GATKVCFConstants.AS_FILTER_STATUS_KEY, GATKVCFConstants.AS_SB_TABLE_KEY)); + List> attributesByAllele = splitAttributesIntoPerAlleleLists(vc, attrsSpecialFormats, outputHeader); + splitASSBTable(vc, attributesByAllele); + splitASFilters(vc, attributesByAllele); + + ListIterator> attributesByAlleleIterator = attributesByAllele.listIterator(); + for (final Allele alt : vc.getAlternateAlleles()) { final VariantContextBuilder builder = new VariantContextBuilder(vc); // make biallelic alleles final List alleles = Arrays.asList(vc.getReference(), alt); builder.alleles(alleles); + Map attributes = attributesByAlleleIterator.next(); + builder.attributes(attributes); - // split allele specific filters - int alleleIndex = vc.getAlleleIndex(alt); - int altIndex = alleleIndex - 1; - // the reason we are getting as list and then joining on , is because the default getAttributeAsString for a list will add spaces between items which we don't - // want to have to trim out later in the code - String asfiltersStr = String.join(",", vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD)); - List filtersList = AnnotationUtils.decodeAnyASListWithRawDelim(asfiltersStr); - if (filtersList.size() > altIndex) { - String filters = filtersList.get(altIndex); - if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { - AnnotationUtils.decodeAnyASList(filters).stream().forEach(filter -> builder.filter(filter)); - } - builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, filters); + // now add the allele specific filters to the variant context + String filters = (String) attributes.get(GATKVCFConstants.AS_FILTER_STATUS_KEY); + if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { + AnnotationUtils.decodeAnyASList(filters).stream().forEach(filter -> builder.filter(filter)); } + int alleleIndex = vc.getAlleleIndex(alt); builder.genotypes(AlleleSubsettingUtils.subsetSomaticAlleles(outputHeader, vc.getGenotypes(), alleles, new int[]{0, alleleIndex})); final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true); biallelics.add(trimmed); @@ -1437,6 +1441,67 @@ public static List splitSomaticVariantContextToBiallelics(final } } + public static void splitASSBTable(VariantContext vc, List> attrsByAllele) { + List sbs = StrandBiasUtils.getSBsForAlleles(vc).stream().map(ints -> StrandBiasUtils.encode(ints)).collect(Collectors.toList()); + new IndexRange(1, sbs.size()).forEach(i -> { + String newattrs = String.join(AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM, new ArrayList(Arrays.asList(sbs.get(0), sbs.get(i)))); + attrsByAllele.get(i - 1).put(GATKVCFConstants.AS_SB_TABLE_KEY, newattrs); + }); + } + + public static void splitASFilters(VariantContext vc, List> attrsByAllele) { + // the reason we are getting as list and then joining on , is because the default getAttributeAsString for a list will add spaces between items which we don't + // want to have to trim out later in the code + String asfiltersStr = String.join(",", vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD)); + List filtersList = AnnotationUtils.decodeAnyASListWithRawDelim(asfiltersStr); + new IndexRange(0, filtersList.size()).forEach(i -> attrsByAllele.get(i).put(GATKVCFConstants.AS_FILTER_STATUS_KEY, filtersList.get(i))); + } + + public static List> splitAttributesIntoPerAlleleLists(VariantContext vc, List skipAttributes, VCFHeader outputHeader) { + List> results = new ArrayList<>(vc.getNAlleles()-1); + vc.getAlternateAlleles().forEach(alt -> results.add(new HashMap<>())); + + Map attributes = vc.getAttributes(); + attributes.entrySet().stream().filter(entry -> !skipAttributes.contains(entry.getKey())).forEachOrdered(entry -> { + String key = entry.getKey(); + // default to unbounded in case header is not found + VCFHeaderLineCount countType = VCFHeaderLineCount.UNBOUNDED; + try { + VCFInfoHeaderLine header = outputHeader.getInfoHeaderLine(key); + countType = header.getCountType(); + } catch (IllegalStateException ex) { + // this happens for DP if we use GATKVCFHeaderLines.getInfoLine(key) + // shouldn't happen now that we use the generated output header + logger.warn("Could not find header info for key " + key); + } + // override count type for this attribute + if (key.equals(GATKVCFConstants.REPEATS_PER_ALLELE_KEY)) { + countType = VCFHeaderLineCount.R; + } + List attr; + switch (countType) { + case A: + attr = vc.getCommonInfo().getAttributeAsList(key); + ValidationUtils.validateArg(attr.size() == results.size(), "Incorrect attribute size for " + key); + new IndexRange(0, attr.size()).forEach(i -> results.get(i).put(key, attr.get(i))); + break; + case R: + attr = vc.getCommonInfo().getAttributeAsList(key); + ValidationUtils.validateArg(attr.size() == vc.getNAlleles(), "Incorrect attribute size for " + key); + new IndexRange(1, attr.size()).forEach(i -> { + List newattrs = new ArrayList(Arrays.asList(attr.get(0), attr.get(i))); + results.get(i-1).put(key, newattrs); + }); + break; + default: + results.forEach(altMap -> altMap.put(key, entry.getValue())); + + } + + }); + return results; + } + /** * Split variant context into its biallelic components if there are more than 2 alleles *

    diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java index 08401ebcbe9..746516e93db 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java @@ -23,8 +23,8 @@ public void testLowHetVariantsFiltered() { final File outputFile = createTempFile("low-het-test", ".vcf"); final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() .addReference(MITO_REF.getAbsolutePath()) - .addArgument(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) - .addArgument(MTLowHeteroplasmyFilterTool.MIN_LOW_HET_SITES_LONG_NAME, 0) + .add(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) + .add(MTLowHeteroplasmyFilterTool.MIN_LOW_HET_SITES_LONG_NAME, 0) .addOutput(outputFile); runCommandLine(argsBuilder); Set variants = VariantContextTestUtils.streamVcf(outputFile) @@ -37,7 +37,7 @@ public void testNoLowHetVariantsFiltered() { final File outputFile = createTempFile("no-low-het-test", ".vcf"); final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() .addReference(MITO_REF.getAbsolutePath()) - .addArgument(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) + .add(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) .addOutput(outputFile); runCommandLine(argsBuilder); Assert.assertTrue(VariantContextTestUtils.streamVcf(outputFile) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf index 7ee7d5c3c91..79831aa7fa2 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf @@ -77,16 +77,16 @@ #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 chrM 73 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,1|4689,4744;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31111.83 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 chrM 263 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,6|2124,4436;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21614.00 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 -chrM 301 . A ACCC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=906,3426|3,158|0,533;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.86,70.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:4332,161:6.324e-03:5026:1397,51:1767,86:906,3426,3,691 -chrM 301 . A ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=906,3426|3,158|0,533;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.86,70.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:4332,533:0.040:5026:1397,226:1767,234:906,3426,3,691 -chrM 302 . A AC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/././././.:118,218:0.049:4311:31,78:25,109:103,15,473,3720 -chrM 302 . A C . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/./././.:118,786:0.134:4311:31,364:25,386:103,15,473,3720 -chrM 302 . A ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/././.:118,2335:0.637:4311:31,752:25,1245:103,15,473,3720 -chrM 302 . A ACCC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/./.:118,396:0.098:4311:31,135:25,195:103,15,473,3720 -chrM 302 . A ACCCC . blacklisted_site;possible_numt;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1/.:118,25:4.163e-03:4311:31,7:25,11:103,15,473,3720 -chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././././1:118,433:0.071:4311:31,153:25,150:103,15,473,3720 -chrM 310 . T C . base_qual;blacklisted_site;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=1,2|357,46|206,3990;DP=5034;ECNT=4;MBQ=20,2,30;MFRL=159,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=95.66,12037.35 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,403:0.049:4602:3,66:0,82:1,2,563,4036 -chrM 310 . T TC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=1,2|357,46|206,3990;DP=5034;ECNT=4;MBQ=20,2,30;MFRL=159,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=95.66,12037.35 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,4196:0.950:4602:3,1356:0,1920:1,2,563,4036 +chrM 301 . A ACCC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=906,3426|3,158;DP=5348;ECNT=4;MBQ=20,20;MFRL=393,387;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=7.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:4332,161:6.324e-03:5026:1397,51:1767,86:906,3426,3,691 +chrM 301 . A ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=906,3426|0,533;DP=5348;ECNT=4;MBQ=20,20;MFRL=393,398;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=70.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:4332,533:0.040:5026:1397,226:1767,234:906,3426,3,691 +chrM 302 . A AC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,396;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=219.45 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/././././.:118,218:0.049:4311:31,78:25,109:103,15,473,3720 +chrM 302 . A C . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|13,773;DP=5359;ECNT=4;MBQ=10,30;MFRL=396,396;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=423.29 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/./././.:118,786:0.134:4311:31,364:25,386:103,15,473,3720 +chrM 302 . A ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|20,2315;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,393;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=4415.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/././.:118,2335:0.637:4311:31,752:25,1245:103,15,473,3720 +chrM 302 . A ACCC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|4,392;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,387;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=421.50 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/./.:118,396:0.098:4311:31,135:25,195:103,15,473,3720 +chrM 302 . A ACCCC . blacklisted_site;possible_numt;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=103,15|1,24;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,379;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=5.38 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1/.:118,25:4.163e-03:4311:31,7:25,11:103,15,473,3720 +chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=103,15|433,0;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,394;MMQ=60,60;MPOS=8;OCM=0;POPAF=2.40;TLOD=111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././././1:118,433:0.071:4311:31,153:25,150:103,15,473,3720 +chrM 310 . T C . base_qual;blacklisted_site;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=1,2|357,46;DP=5034;ECNT=4;MBQ=20,2;MFRL=159,398;MMQ=60,60;MPOS=4;OCM=0;POPAF=2.40;TLOD=95.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,403:0.049:4602:3,66:0,82:1,2,563,4036 +chrM 310 . T TC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=1,2|206,3990;DP=5034;ECNT=4;MBQ=20,30;MFRL=159,393;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=12037.35 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,4196:0.950:4602:3,1356:0,1920:1,2,563,4036 chrM 499 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=2,8|3987,4458;DP=8755;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24353.87 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8445:1.000:8455:4,3065:2,4110:2,8,3987,4458 chrM 750 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5187,5182;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34231.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 chrM 824 . T C . possible_numt;strand_bias AS_FilterStatus=strand_bias,possible_numt;AS_SB_TABLE=5090,5150|20,24;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=23.47 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.440e-03:10284:4925,18:5163,23:5090,5150,20,24 @@ -122,17 +122,17 @@ chrM 15326 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|5077,5355;DP=10698;ECN chrM 15535 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=1,13|5374,5027;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33834.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 chrM 16149 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=1731,3292|5,170;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=3.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.489e-03:5198:2065,10:2653,12:1731,3292,5,170 chrM 16175 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=903,2767|6,185;DP=4056;ECNT=8;MBQ=20,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.16 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3670,191:6.729e-03:3861:1180,16:2025,12:903,2767,6,185 -chrM 16179 . CAA C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./.:1480,647:0.272:2701:462,165:869,424:201,1279,206,1015 -chrM 16179 . CA C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/.:1480,308:0.107:2701:462,117:869,175:201,1279,206,1015 -chrM 16179 . CAAA C . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1:1480,266:0.057:2701:462,48:869,191:201,1279,206,1015 +chrM 16179 . CAA C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=201,1279|160,487;DP=3867;ECNT=8;MBQ=30,30;MFRL=397,396;MMQ=60,60;MPOS=29;OCM=0;POPAF=2.40;RPA=4,2;RU=A;STR;TLOD=926.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./.:1480,647:0.272:2701:462,165:869,424:201,1279,206,1015 +chrM 16179 . CA C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=201,1279|44,264;DP=3867;ECNT=8;MBQ=30,30;MFRL=397,397;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;RPA=4,3;RU=A;STR;TLOD=330.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/.:1480,308:0.107:2701:462,117:869,175:201,1279,206,1015 +chrM 16179 . CAAA C . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=201,1279|2,264;DP=3867;ECNT=8;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=38;OCM=0;POPAF=2.40;RPA=4,1;RU=A;STR;TLOD=75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1:1480,266:0.057:2701:462,48:869,191:201,1279,206,1015 chrM 16181 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=662,1960|27,694;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=65.50 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2622,721:0.157:3343:700,60:1279,245:662,1960,27,694 chrM 16182 . A C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=246,976|137,1340;DP=3647;ECNT=8;MBQ=20,30;MFRL=397,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2944.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1222,1477:0.559:2699:233,442:621,872:246,976,137,1340 -chrM 16183 . A C . PASS AS_FilterStatus=.;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./././.:97,1779:0.698:2656:26,572:31,1049:57,40,306,2253 -chrM 16183 . A ACCC . PASS AS_FilterStatus=.;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/././.:97,413:0.165:2656:26,52:31,278:57,40,306,2253 -chrM 16183 . A ACCCC . strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/./.:97,246:0.101:2656:26,22:31,177:57,40,306,2253 -chrM 16183 . A ACCCCC . possible_numt;weak_evidence AS_FilterStatus=weak_evidence,possible_numt;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/.:97,51:5.638e-03:2656:26,2:31,36:57,40,306,2253 -chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AS_FilterStatus=possible_numt;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1:97,70:8.474e-03:2656:26,19:31,19:57,40,306,2253 -chrM 16189 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=1,2|596,2826|5,1;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14669.67,3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,3422:0.998:3431:2,1023:1,2014:1,2,601,2827 -chrM 16189 . T A . possible_numt;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=1,2|596,2826|5,1;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14669.67,3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,6:1.168e-03:3431:2,2:1,2:1,2,601,2827 +chrM 16183 . A C . PASS AS_FilterStatus=.;AS_SB_TABLE=57,40|175,1604;DP=3634;ECNT=8;MBQ=20,30;MFRL=389,397;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=4075.79 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./././.:97,1779:0.698:2656:26,572:31,1049:57,40,306,2253 +chrM 16183 . A ACCC . PASS AS_FilterStatus=.;AS_SB_TABLE=57,40|56,357;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,396;MMQ=60,60;MPOS=-2147483648;OCM=0;POPAF=2.40;TLOD=494.17 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/././.:97,413:0.165:2656:26,52:31,278:57,40,306,2253 +chrM 16183 . A ACCCC . strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=57,40|5,241;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,393;MMQ=60,60;MPOS=21;OCM=0;POPAF=2.40;TLOD=190.34 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/./.:97,246:0.101:2656:26,22:31,177:57,40,306,2253 +chrM 16183 . A ACCCCC . possible_numt;weak_evidence AS_FilterStatus=weak_evidence,possible_numt;AS_SB_TABLE=57,40|0,51;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,377;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=2.16 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/.:97,51:5.638e-03:2656:26,2:31,36:57,40,306,2253 +chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AS_FilterStatus=possible_numt;AS_SB_TABLE=57,40|70,0;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,402;MMQ=60,60;MPOS=-2147483648;OCM=0;POPAF=2.40;TLOD=10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1:97,70:8.474e-03:2656:26,19:31,19:57,40,306,2253 +chrM 16189 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=1,2|596,2826;DP=3540;ECNT=8;MBQ=30,30;MFRL=414,397;MMQ=60,60;MPOS=25;OCM=0;POPAF=2.40;TLOD=14669.67 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,3422:0.998:3431:2,1023:1,2014:1,2,601,2827 +chrM 16189 . T A . possible_numt;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=1,2|5,1;DP=3540;ECNT=8;MBQ=30,25;MFRL=414,369;MMQ=60,60;MPOS=4;OCM=0;POPAF=2.40;TLOD=3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,6:1.168e-03:3431:2,2:1,2:1,2,601,2827 chrM 16217 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|1114,3004;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14884.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 chrM 16519 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=3,3|4970,4301;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30665.46 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 From 8091e0d013d6c3b5c78bd47d8736d8168a17d326 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 5 Mar 2020 12:54:14 -0500 Subject: [PATCH 56/85] change count type for RPA --- .../hellbender/utils/variant/GATKVCFHeaderLines.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index 004d5bf7eb7..cbfbafd0bc0 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -172,7 +172,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addInfoLine(new VCFInfoHeaderLine(AS_STRAND_ODDS_RATIO_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele specific strand Odds Ratio of 2x|Alts| contingency table to detect allele specific strand bias")); addInfoLine(new VCFInfoHeaderLine(STR_PRESENT_KEY, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat")); addInfoLine(new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)")); - addInfoLine(new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)")); + addInfoLine(new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)")); addInfoLine(new VCFInfoHeaderLine(NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site")); addInfoLine(new VCFInfoHeaderLine(ORIGINAL_AC_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC")); addInfoLine(new VCFInfoHeaderLine(ORIGINAL_AF_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF")); From 30396adf4fa7175d5a5681ac49edfc7723a32fe3 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 5 Mar 2020 16:16:38 -0500 Subject: [PATCH 57/85] make NuMTFilter its own tool. update constants for MT low het tool --- .../mutect/filtering/AlleleFilterUtils.java | 53 +++++++++ .../ChimericOriginalAlignmentFilter.java | 42 ------- .../M2FiltersArgumentCollection.java | 17 +-- .../MTLowHeteroplasmyFilterTool.java | 12 +- .../filtering/MinAlleleFractionFilter.java | 2 +- .../mutect/filtering/Mutect2AlleleFilter.java | 2 +- .../filtering/Mutect2FilteringEngine.java | 5 +- .../walkers/mutect/filtering/NuMTFilter.java | 50 --------- .../mutect/filtering/NuMTFilterTool.java | 97 +++++++++++++++++ .../utils/variant/GATKVCFConstants.java | 2 +- .../mutect/Mutect2IntegrationTest.java | 103 +++++++++++++++--- .../MTLowHeteroplasmyFilterToolTest.java | 2 +- .../tools/mutect/mito/initialFiltered.vcf | 73 +++++++++++++ .../tools/mutect/mito/initialFiltered.vcf.idx | Bin 0 -> 279 bytes 14 files changed, 320 insertions(+), 140 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilterTool.java create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf.idx diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java new file mode 100644 index 00000000000..44592209142 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java @@ -0,0 +1,53 @@ +package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; + +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFConstants; +import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; +import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.ListIterator; +import java.util.stream.Collectors; + +public class AlleleFilterUtils { + + public static List> decodeASFilters(VariantContext vc) { + return AnnotationUtils.decodeAnyASListWithRawDelim(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, "")).stream() + .map(filters -> AnnotationUtils.decodeAnyASList(filters).stream().map(String::trim).collect(Collectors.toList())) + .collect(Collectors.toList()); + } + + public static String encodeASFilters(List> filters) { + return AnnotationUtils.encodeAnyASListWithRawDelim(filters.stream().map(alleleFilters -> AnnotationUtils.encodeStringList(alleleFilters)).collect(Collectors.toList())); + } + + public static String getMergedASFilterString(VariantContext vc, List isFiltered, String filterName) { + List> alleleFilters = decodeASFilters(vc); + Utils.validateArg(isFiltered.size() == alleleFilters.size(), "lists are not the same size"); + ListIterator isFilteredIt = isFiltered.listIterator(); + + List> updatedFilters = alleleFilters.stream().map(filters -> { + Boolean filtered = isFilteredIt.next(); + if (filtered) { + return addFilter(filters, filterName); + } + else return filters; + }).collect(Collectors.toList()); + return encodeASFilters(updatedFilters); + } + + public static List addFilter(List currentFilters, String newFilter) { + if (currentFilters.size() == 1 && currentFilters.contains(VCFConstants.PASSES_FILTERS_v4)) { + return Collections.singletonList(newFilter); + } else { + List updated = new ArrayList<>(); + updated.addAll(currentFilters); + updated.add(newFilter); + return updated; + } + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java deleted file mode 100644 index 8367188d4fd..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ChimericOriginalAlignmentFilter.java +++ /dev/null @@ -1,42 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; - -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.VariantContext; -import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; - -import java.util.*; -import java.util.stream.Collectors; - -public class ChimericOriginalAlignmentFilter extends HardAlleleFilter { - private final double maxNuMTFraction; - - public ChimericOriginalAlignmentFilter(final double maxNuMTFraction) { - this.maxNuMTFraction = maxNuMTFraction; - } - - @Override - public ErrorType errorType() { return ErrorType.ARTIFACT; } - - public List getData(Genotype g) { - return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); - } - - @Override - public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - final int nonMitochondrialOriginalAlignmentCount = vc.getAttributeAsInt(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, 0); - LinkedHashMap> dataByAllele = getDataByAllele(vc, Genotype::hasAD, this::getData, filteringEngine); - return dataByAllele.entrySet().stream() - .filter(entry -> !vc.getReference().equals(entry.getKey())) - .map(entry -> (double) nonMitochondrialOriginalAlignmentCount / entry.getValue().stream().mapToInt(Integer::intValue).sum() > maxNuMTFraction).collect(Collectors.toList()); - } - - @Override - public String filterName() { - return GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME; - } - - @Override - protected List requiredInfoAnnotations() { return Collections.singletonList(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY); } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java index 3c68a26d431..991693e6760 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/M2FiltersArgumentCollection.java @@ -37,8 +37,7 @@ public class M2FiltersArgumentCollection { public double initialPosteriorThreshold = DEFAULT_INITIAL_POSTERIOR_THRESHOLD; /** - * Mitochondria mode includes the filter{@link ChimericOriginalAlignmentFilter} and {@link NuMTFilter}, - * and excludes the filters {@link ClusteredEventsFilter}, {@link MultiallelicFilter}, {@link PolymeraseSlippageFilter}, + * Mitochondria mode excludes the filters {@link ClusteredEventsFilter}, {@link MultiallelicFilter}, {@link PolymeraseSlippageFilter}, * {@link FilteredHaplotypeFilter}, {@link FragmentLengthFilter}, and {@link GermlineFilter} */ @Argument(fullName = M2ArgumentCollection.MITOCHONDRIA_MODE_LONG_NAME, optional = true, doc = "Set filters to mitochondrial defaults") @@ -57,9 +56,6 @@ public class M2FiltersArgumentCollection { public static final String MIN_MEDIAN_READ_POSITION_LONG_NAME = "min-median-read-position"; public static final String MAX_N_RATIO_LONG_NAME = "max-n-ratio"; public static final String MIN_READS_ON_EACH_STRAND_LONG_NAME = "min-reads-per-strand"; - public static final String MAX_NUMT_FRACTION_LONG_NAME = "max-numt-fraction"; - public static final String MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME = "autosomal-coverage"; - public static final String MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME = "max-numt-autosomal-copies"; public static final String MIN_AF_LONG_NAME = "min-allele-fraction"; private static final int DEFAULT_MAX_EVENTS_IN_REGION = 2; @@ -72,8 +68,6 @@ public class M2FiltersArgumentCollection { private static final double DEFAULT_MAX_N_RATIO = Double.POSITIVE_INFINITY; private static final int DEFAULT_MIN_READS_ON_EACH_STRAND = 0; private static final double DEFAULT_MAX_NUMT_FRACTION = 0.85; - private static final double DEFAULT_MEDIAN_AUTOSOMAL_COVERAGE = 0; - private static final double DEFAULT_MAX_NUMT_AUTOSOMAL_COPIES = 4; private static final double DEFAULT_MIN_AF = 0; @Argument(fullName = MAX_EVENTS_IN_REGION_LONG_NAME, optional = true, doc = "Maximum events in a single assembly region. Filter all variants if exceeded.") @@ -103,15 +97,6 @@ public class M2FiltersArgumentCollection { @Argument(fullName = MIN_READS_ON_EACH_STRAND_LONG_NAME, optional = true, doc = "Minimum alt reads required on both forward and reverse strands") public int minReadsOnEachStrand = DEFAULT_MIN_READS_ON_EACH_STRAND; - @Argument(fullName = MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, optional = true, doc = "Median autosomal coverage for filtering potential NuMTs when calling on mitochondria.") - public double medianAutosomalCoverage = DEFAULT_MEDIAN_AUTOSOMAL_COVERAGE; - - @Argument(fullName = MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, optional = true, doc = "Max expected NUMT copies in autosome used for filtering potential NuMTs when calling on mitochondria.") - public double maxNuMTAutosomalCopies = DEFAULT_MAX_NUMT_AUTOSOMAL_COPIES; - - @Argument(fullName = MAX_NUMT_FRACTION_LONG_NAME, doc="Maximum fraction of alt reads that originally aligned outside the mitochondria. These are due to NuMTs.", optional = true) - public double maxNuMTFraction = DEFAULT_MAX_NUMT_FRACTION; - @Argument(fullName = MIN_AF_LONG_NAME, doc="Minimum allele fraction required", optional = true) public double minAf = DEFAULT_MIN_AF; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java index e933b9f00bc..b095089731d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java @@ -12,13 +12,11 @@ import org.broadinstitute.hellbender.engine.ReadsContext; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.TwoPassVariantWalker; -import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import picard.cmdline.programgroups.VariantFilteringProgramGroup; import java.io.File; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -31,7 +29,7 @@ ) public class MTLowHeteroplasmyFilterTool extends TwoPassVariantWalker { - public static final String MIN_LOW_HET_SITES_LONG_NAME = "min-low-het-sites"; + public static final String MAX_ALLOWED_LOW_HETS_LONG_NAME = "max-allowed-low-hets"; public static final String LOW_HET_THRESHOLD_LONG_NAME = "low-het-threshold"; @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, @@ -39,10 +37,10 @@ public class MTLowHeteroplasmyFilterTool extends TwoPassVariantWalker { doc = "Output VCF file") private String outputVcf = null; - @Argument(fullName = MIN_LOW_HET_SITES_LONG_NAME, - doc = "Number of low het sites allowed to pass other filters before filtering out all low het sites. Default is 5", + @Argument(fullName = MAX_ALLOWED_LOW_HETS_LONG_NAME, + doc = "Number of low het sites allowed to pass other filters before filtering out all low het sites. Default is 3", optional=true) - private int minLowHetSites = 3; + private int maxAllowedLowHets = 3; @Argument(fullName = LOW_HET_THRESHOLD_LONG_NAME, doc = "Threshold for determining a low heteroplasmy site. Default is 0.1", @@ -72,7 +70,7 @@ protected void firstPassApply(VariantContext variant, ReadsContext readsContext, @Override protected void afterFirstPass() { - failedLowHet = unfilteredLowHetSites > minLowHetSites; + failedLowHet = unfilteredLowHetSites > maxAllowedLowHets; } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java index 1dd385a6708..65797da1db5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MinAlleleFractionFilter.java @@ -28,7 +28,7 @@ public List getAltData(final Genotype g) { @Override public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - LinkedHashMap> dataByAllele = getAltDataByAllele(vc, g -> g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY), this::getAltData, filteringEngine); + LinkedHashMap> dataByAllele = getAltDataByAllele(vc, g -> g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY) && filteringEngine.isTumor(g), this::getAltData, filteringEngine); return dataByAllele.entrySet().stream() .filter(entry -> !vc.getReference().equals(entry.getKey())) .map(entry -> entry.getValue().stream().max(Double::compare).orElse(1.0) < minAf).collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 43b2fb78c20..6c152404153 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -27,7 +27,7 @@ public static LinkedHashMap> getAltDataByAllele(final Varian private static LinkedHashMap> combineDataByAllele(final LinkedHashMap> dataByAllele, final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele - vc.getGenotypes().stream().filter(preconditions).filter(filteringEngine::isTumor) + vc.getGenotypes().stream().filter(preconditions) .forEach(g -> { Iterator alleleDataIterator = getData.apply(g).iterator(); Iterator> dataByAlleleIterator = dataByAllele.values().iterator(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 62508ce7f8f..0f4fde205f8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -318,10 +318,7 @@ private void buildFiltersList(final M2FiltersArgumentCollection MTFAC) { filters.add(new ReadOrientationFilter(artifactTables)); } - if (MTFAC.mitochondria) { - filters.add(new ChimericOriginalAlignmentFilter(MTFAC.maxNuMTFraction)); - filters.add(new NuMTFilter(MTFAC.medianAutosomalCoverage, MTFAC.maxNuMTAutosomalCopies)); - } else { + if (!MTFAC.mitochondria) { filters.add(new ClusteredEventsFilter(MTFAC.maxEventsInRegion)); filters.add(new MultiallelicFilter(MTFAC.numAltAllelesThreshold)); filters.add(new FragmentLengthFilter(MTFAC.maxMedianFragmentLengthDifference)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java deleted file mode 100644 index 171a1de2d5e..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilter.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; - -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.math3.distribution.PoissonDistribution; -import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; - -import java.util.*; -import java.util.stream.Collectors; - - -public class NuMTFilter extends HardAlleleFilter { - private static final double LOWER_BOUND_PROB = .01; - private final int maxAltDepthCutoff; - - public NuMTFilter(final double medianAutosomalCoverage, final double maxNuMTCopies){ - if (maxNuMTCopies > 0 && medianAutosomalCoverage > 0) { - final PoissonDistribution autosomalCoverage = new PoissonDistribution(medianAutosomalCoverage * maxNuMTCopies / 2.0); - maxAltDepthCutoff = autosomalCoverage.inverseCumulativeProbability(1 - LOWER_BOUND_PROB); - } else { - maxAltDepthCutoff = 0; - } - } - - @Override - public ErrorType errorType() { return ErrorType.NON_SOMATIC; } - - public List getData(Genotype g) { - return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); - } - - @Override - public List areAllelesArtifacts(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { - LinkedHashMap> dataByAllele = getDataByAllele(vc, Genotype::hasAD, this::getData, filteringEngine); - return dataByAllele.entrySet().stream() - .filter(entry -> !vc.getReference().equals(entry.getKey())) - .map(entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff).collect(Collectors.toList()); - } - - @Override - public String filterName() { - return GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME; - } - - @Override - protected List requiredInfoAnnotations() { return Collections.emptyList(); } - -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilterTool.java new file mode 100644 index 00000000000..1d43a606e28 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilterTool.java @@ -0,0 +1,97 @@ +package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFHeader; +import org.apache.commons.math3.distribution.PoissonDistribution; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadsContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.VariantWalker; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; +import picard.cmdline.programgroups.VariantFilteringProgramGroup; + +import java.io.File; +import java.util.*; +import java.util.stream.Collectors; + +@CommandLineProgramProperties( + summary = "", + oneLineSummary = "", + programGroup = VariantFilteringProgramGroup.class +) + +public class NuMTFilterTool extends VariantWalker { + public static final String MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME = "autosomal-coverage"; + public static final String MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME = "max-numt-autosomal-copies"; + private static final double DEFAULT_MEDIAN_AUTOSOMAL_COVERAGE = 0; + private static final double DEFAULT_MAX_NUMT_AUTOSOMAL_COPIES = 4; + + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + doc = "Output VCF file") + private String outputVcf = null; + + @Argument(fullName = MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, optional = true, doc = "Median autosomal coverage for filtering potential NuMTs when calling on mitochondria.") + public double medianAutosomalCoverage = DEFAULT_MEDIAN_AUTOSOMAL_COVERAGE; + + @Argument(fullName = MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, optional = true, doc = "Max expected NUMT copies in autosome used for filtering potential NuMTs when calling on mitochondria.") + public double maxNuMTAutosomalCopies = DEFAULT_MAX_NUMT_AUTOSOMAL_COPIES; + + + private VariantContextWriter vcfWriter; + private static final double LOWER_BOUND_PROB = .01; + private int maxAltDepthCutoff = 0; + + + @Override + public void onTraversalStart() { + final VCFHeader header = getHeaderForVariants(); + header.addMetaDataLine(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME)); + vcfWriter = createVCFWriter(new File(outputVcf)); + vcfWriter.writeHeader(header); + if (maxNuMTAutosomalCopies > 0 && medianAutosomalCoverage > 0) { + final PoissonDistribution autosomalCoverage = new PoissonDistribution(medianAutosomalCoverage * maxNuMTAutosomalCopies / 2.0); + maxAltDepthCutoff = autosomalCoverage.inverseCumulativeProbability(1 - LOWER_BOUND_PROB); + } + } + + public List getData(Genotype g) { + return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); + } + + @Override + public void apply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + VariantContextBuilder vcb = new VariantContextBuilder(variant); + LinkedHashMap> dataByAllele = Mutect2AlleleFilter.getDataByAllele(variant, Genotype::hasAD, this::getData, null); + List appliedFilter = dataByAllele.entrySet().stream() + .filter(entry -> !variant.getReference().equals(entry.getKey())) + .map(entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff).collect(Collectors.toList()); + if (!appliedFilter.contains(Boolean.FALSE)) { + vcb.filter(filterName()); + } + if (appliedFilter.contains(Boolean.TRUE)) { + vcb.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AlleleFilterUtils.getMergedASFilterString(variant, appliedFilter, filterName())); + } + vcfWriter.add(vcb.make()); + } + + @Override + public void closeTool() { + if ( vcfWriter != null ) { + vcfWriter.close(); + } + } + + public String filterName() { + return GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME; + } + +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index b90ad63add8..cab9ef1ae96 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -164,7 +164,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i public final static String CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME = "numt_chimera"; //mitochondria public final static String ALLELE_FRACTION_FILTER_NAME = "low_allele_frac"; public static final String POSSIBLE_NUMT_FILTER_NAME = "possible_numt"; - public static final String LOW_HET_FILTER_NAME = "mt_low_het"; + public static final String LOW_HET_FILTER_NAME = "mt_many_low_hets"; public static final String FAIL = "FAIL"; public static final List MUTECT_FILTER_NAMES = Arrays.asList(VCFConstants.PASSES_FILTERS_v4, POLYMERASE_SLIPPAGE, diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 92010f59af6..a547415b59d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -21,6 +21,7 @@ import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReferenceConfidenceMode; import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.FilterMutectCalls; import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.M2FiltersArgumentCollection; +import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.NuMTFilterTool; import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.ReadOrientationFilter; import org.broadinstitute.hellbender.tools.walkers.readorientation.LearnReadOrientationModel; import org.broadinstitute.hellbender.tools.walkers.validation.Concordance; @@ -77,6 +78,8 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest { private static final File NA12878_MITO_BAM = new File(toolsTestDir, "mutect/mito/NA12878.bam"); private static final File NA12878_MITO_VCF = new File(toolsTestDir, "mutect/mito/unfiltered-with-assb.vcf"); private static final File NA12878_MITO_GVCF = new File(toolsTestDir, "mitochondria/NA12878.MT.g.vcf"); + private static final File NA12878_MITO_INITIAL_FILTERED_VCF = new File(toolsTestDir, "mutect/mito/initialFiltered.vcf"); + private static final File NA12878_MITO_INITIAL_FILTERED_GVCF = new File(toolsTestDir, "mitochondria/initialFiltered.gvcf"); private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); private static final File DEEP_MITO_BAM = new File(largeFileTestDir, "mutect/highDPMTsnippet.bam"); private static final String DEEP_MITO_SAMPLE_NAME = "mixture"; @@ -511,11 +514,10 @@ public void testMitochondria() { @DataProvider(name = "vcfsForFiltering") public Object[][] vcfsForFiltering() { return new Object[][]{ - {NA12878_MITO_VCF, 0.5, 30, Collections.emptyList(), Arrays.asList( + {NA12878_MITO_VCF, 0.5, Collections.emptyList(), Arrays.asList( ImmutableSet.of(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), - ImmutableSet.of(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), + Collections.emptySet(), ImmutableSet.of( GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, - GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), Collections.emptySet(), Collections.emptySet(), @@ -523,34 +525,33 @@ public Object[][] vcfsForFiltering() { ImmutableSet.of(GATKVCFConstants.FAIL)), Arrays.asList( Arrays.asList(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // strand_bias, strict_stand - Arrays.asList(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME), // numt_chimera - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // weak_evidence, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), // weak_evidence, low_allele_frac + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), // .|weak_evidence, strand_bias, low_allele_frac|strand_bias, strict_strand, low_allele_frac Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . Arrays.asList(GATKVCFConstants.DUPLICATED_EVIDENCE_FILTER_NAME), // duplicate - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // weak_evidence, strand_bias, strict_stand|low_allele_frac, possible_numt + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME) // weak_evidence, strand_bias, strict_stand|low_allele_frac )}, - {NA12878_MITO_GVCF, .0009, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( + {NA12878_MITO_GVCF, .0009, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( Collections.emptySet(), ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME), - ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME,GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), + ImmutableSet.of(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME,GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), Collections.emptySet(), - ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, - GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, + ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME)), Arrays.asList( Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), //".", Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual, strand_bias|.", - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "weak_evidence, strict_strand, strand_bias, possible_numt|.", + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "weak_evidence, strict_strand, strand_bias|.", Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|.", - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "weak_evidence, base_qual, map_qual, contamination, strand_artifact, position, low_allele_frac, possible_numt|." + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "weak_evidence, base_qual, map_qual, contamination, strand_artifact, position, low_allele_frac|." )} }; } @Test(dataProvider = "vcfsForFiltering") - public void testFilterMitochondria(File unfiltered, final double minAlleleFraction, final double autosomalCoverage, final List intervals, List> expectedFilters, List> expectedASFilters) { + public void testFilterMitochondria(File unfiltered, final double minAlleleFraction, final List intervals, List> expectedFilters, List> expectedASFilters) { final File filteredVcf = createTempFile("filtered", ".vcf"); // vcf sequence dicts don't match ref @@ -558,8 +559,6 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti args -> args.add(M2ArgumentCollection.MITOCHONDRIA_MODE_LONG_NAME, true), args -> args.add(StandardArgumentDefinitions.DISABLE_SEQUENCE_DICT_VALIDATION_NAME, true), args -> args.add(M2FiltersArgumentCollection.MIN_AF_LONG_NAME, minAlleleFraction), - args -> args.add(M2FiltersArgumentCollection.MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, autosomalCoverage), - args -> args.add(M2FiltersArgumentCollection.MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, 4.0), args -> args.add(M2FiltersArgumentCollection.MIN_READS_ON_EACH_STRAND_LONG_NAME, 1), args -> args.add(M2FiltersArgumentCollection.UNIQUE_ALT_READ_COUNT_LONG_NAME, 2), args -> { @@ -567,7 +566,77 @@ public void testFilterMitochondria(File unfiltered, final double minAlleleFracti return args; }); - // add tests for DUPLICATE + final List> actualFilters = VariantContextTestUtils.streamVcf(filteredVcf) + .map(VariantContext::getFilters).collect(Collectors.toList()); + + final List> actualASFilters = VariantContextTestUtils.streamVcf(filteredVcf) + .map(vc -> AnnotationUtils.decodeAnyASListWithRawDelim(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, ""))).collect(Collectors.toList()); + Assert.assertEquals(actualASFilters, expectedASFilters); + + Assert.assertEquals(actualFilters.size(), expectedFilters.size()); + for (int n = 0; n < actualFilters.size(); n++) { + Assert.assertTrue(actualFilters.get(n).containsAll(expectedFilters.get(n)), "Actual filters missing some expected filters: " + SetUtils.difference(expectedFilters.get(n), actualFilters.get(n))); + Assert.assertTrue(expectedFilters.get(n).containsAll(actualFilters.get(n)), "Expected filters missing some actual filters: " + SetUtils.difference(actualFilters.get(n), expectedFilters.get(n))); + } + + Assert.assertEquals(actualFilters, expectedFilters); + } + + @DataProvider(name = "vcfsForNuMTFiltering") + public Object[][] vcfsForNuMTFiltering() { + return new Object[][]{ + {NA12878_MITO_INITIAL_FILTERED_VCF, 30, Collections.emptyList(), Arrays.asList( + ImmutableSet.of(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), + Collections.emptySet(), + ImmutableSet.of( GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), + Collections.emptySet(), + Collections.emptySet(), + ImmutableSet.of(GATKVCFConstants.DUPLICATED_EVIDENCE_FILTER_NAME), + ImmutableSet.of(GATKVCFConstants.FAIL)), + Arrays.asList( + Arrays.asList(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // strand_bias, strict_stand + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // weak_evidence, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt + Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . + Arrays.asList(GATKVCFConstants.DUPLICATED_EVIDENCE_FILTER_NAME), // duplicate + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // weak_evidence, strand_bias, strict_stand|low_allele_frac, possible_numt + + )} +// {NA12878_MITO_GVCF, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( +// Collections.emptySet(), +// Collections.emptySet(), +// Collections.emptySet(), +// Collections.emptySet(), +// ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME)), +// Arrays.asList( +// Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), //".", +// Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual, strand_bias|.", +// Arrays.asList(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "possible_numt|.", +// Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|.", +// Arrays.asList(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "possible_numt|." +// )} + }; + } + + @Test(dataProvider = "vcfsForNuMTFiltering") + public void testNuMTFilterMitochondria(File initialFilters, final double autosomalCoverage, final List intervals, List> expectedFilters, List> expectedASFilters) { + final File filteredVcf = createTempFile("filtered", ".vcf"); + + final ArgumentsBuilder args = new ArgumentsBuilder() + .addVCF(initialFilters) + .addOutput(filteredVcf) + .addReference(MITO_REF.getAbsolutePath()) + .add(NuMTFilterTool.MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, autosomalCoverage) + .add(NuMTFilterTool.MAX_NUMT_COPIES_IN_AUTOSOME_LONG_NAME, 4.0); + + intervals.stream().map(SimpleInterval::new).forEach(args::addInterval); + + // vcf sequence dicts don't match ref + runCommandLine(args, NuMTFilterTool.class.getSimpleName()); + final List> actualFilters = VariantContextTestUtils.streamVcf(filteredVcf) .map(VariantContext::getFilters).collect(Collectors.toList()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java index 746516e93db..0d8f9ba1a05 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java @@ -24,7 +24,7 @@ public void testLowHetVariantsFiltered() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() .addReference(MITO_REF.getAbsolutePath()) .add(StandardArgumentDefinitions.VARIANT_SHORT_NAME, NA12878_MITO_FILTERED_VCF.getAbsolutePath()) - .add(MTLowHeteroplasmyFilterTool.MIN_LOW_HET_SITES_LONG_NAME, 0) + .add(MTLowHeteroplasmyFilterTool.MAX_ALLOWED_LOW_HETS_LONG_NAME, 0) .addOutput(outputFile); runCommandLine(argsBuilder); Set variants = VariantContextTestUtils.streamVcf(outputFile) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf new file mode 100644 index 00000000000..fa949fcec29 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf @@ -0,0 +1,73 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##Mutect Version=2.1 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##source=FilterMutectCalls +##source=Mutect2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chrM 152 . T C . strand_bias;strict_strand AS_FilterStatus=strand_bias,strict_strand;AS_SB_TABLE=0,3|0,1556;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=1,0|431,400;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 301 . A AC . low_allele_frac;weak_evidence AS_FilterStatus=weak_evidence,low_allele_frac;AS_SB_TABLE=500,149|41,12;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=.|weak_evidence,strand_bias,low_allele_frac|strand_bias,strict_strand,low_allele_frac;AS_SB_TABLE=0,5|51,350|60,7|49,0;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . PASS AS_FilterStatus=.;AS_SB_TABLE=0,0|300,358;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . duplicate AS_FilterStatus=duplicate;AS_SB_TABLE=0,1|100,1424;AS_UNIQ_ALT_READ_COUNT=2;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true +chrM 802 . A C,ACC . FAIL AS_FilterStatus=weak_evidence,strand_bias,strict_strand|low_allele_frac;AS_SB_TABLE=55,50|0,301|30,37;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08;TLOD=10.66,891.23 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2:105,301,67:0.636,0.141:2,163,35:3,238,32:20,20,30:419,316,340:60,60,60:41,33 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf.idx new file mode 100644 index 0000000000000000000000000000000000000000..2106fb9e1e834324fe2187f32a2950733577fc7d GIT binary patch literal 279 zcmZ8bTMEK35L^-Q7=9`UCNXV%{FOFpK`Uq!|ALQJt%@3@sAuvpp2BmuE%e8MU6`Gn zomIZKC;&haE7~8!E+HgY8WNq4o6y`yhZB9ECbpgr8uQiZ5Vc=hx{DKSQN;F26hYrT zG~q_rj5m3UGQlm|wy5B&k*p)QRq;5YwyeJ|X;_L9mgl>wyWW`*Fky=h^rWL^6RC$X z=&OMqNhbi-p#Sr!^0X)CKv7C}bncrw6W Date: Mon, 9 Mar 2020 13:35:41 -0400 Subject: [PATCH 58/85] change info field count type back to 1, since format is non-standard --- .../hellbender/utils/variant/GATKVCFHeaderLines.java | 2 +- .../hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index cbfbafd0bc0..eede81b5556 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -130,7 +130,7 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addInfoLine(new VCFInfoHeaderLine(CLIPPING_RANK_SUM_KEY, 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); addInfoLine(new VCFInfoHeaderLine(FISHER_STRAND_KEY, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias")); addInfoLine(new VCFInfoHeaderLine(AS_FISHER_STRAND_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "allele specific phred-scaled p-value using Fisher's exact test to detect strand bias of each alt allele")); - addInfoLine(new VCFInfoHeaderLine(AS_SB_TABLE_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.String, "Allele-specific forward/reverse read counts for strand bias tests")); + addInfoLine(new VCFInfoHeaderLine(AS_SB_TABLE_KEY, 1, VCFHeaderLineType.String, "Allele-specific forward/reverse read counts for strand bias tests. Includes the reference and alleles separated by |.")); addInfoLine(new VCFInfoHeaderLine(NOCALL_CHROM_KEY, 1, VCFHeaderLineType.Integer, "Number of no-called samples")); addInfoLine(new VCFInfoHeaderLine(GQ_MEAN_KEY, 1, VCFHeaderLineType.Float, "Mean of all GQ values")); addInfoLine(new VCFInfoHeaderLine(GQ_STDEV_KEY, 1, VCFHeaderLineType.Float, "Standard deviation of all GQ values")); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index a547415b59d..29702c8720d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -79,7 +79,6 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest { private static final File NA12878_MITO_VCF = new File(toolsTestDir, "mutect/mito/unfiltered-with-assb.vcf"); private static final File NA12878_MITO_GVCF = new File(toolsTestDir, "mitochondria/NA12878.MT.g.vcf"); private static final File NA12878_MITO_INITIAL_FILTERED_VCF = new File(toolsTestDir, "mutect/mito/initialFiltered.vcf"); - private static final File NA12878_MITO_INITIAL_FILTERED_GVCF = new File(toolsTestDir, "mitochondria/initialFiltered.gvcf"); private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); private static final File DEEP_MITO_BAM = new File(largeFileTestDir, "mutect/highDPMTsnippet.bam"); private static final String DEEP_MITO_SAMPLE_NAME = "mixture"; From 39d04a7ee10206d713963b09933a3d13237e575e Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 9 Mar 2020 13:47:38 -0400 Subject: [PATCH 59/85] fix test --- .../expected.testGVCFMode.gatk4.alleleSpecific.g.vcf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGVCFMode.gatk4.alleleSpecific.g.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGVCFMode.gatk4.alleleSpecific.g.vcf index 8ab01b3bcc3..b8d3da06b46 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGVCFMode.gatk4.alleleSpecific.g.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGVCFMode.gatk4.alleleSpecific.g.vcf @@ -82,7 +82,7 @@ ##INFO= ##INFO= ##INFO= -##INFO= +##INFO= ##INFO= ##INFO= ##INFO= From 8205932da124b265baea203848e53ba8b78565d5 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 9 Mar 2020 14:23:30 -0400 Subject: [PATCH 60/85] change allele specific filter status of . to SITE --- .../mutect/filtering/AlleleFilterUtils.java | 2 +- .../filtering/Mutect2FilteringEngine.java | 22 +++++++------ .../utils/variant/GATKVCFConstants.java | 2 ++ .../mutect/Mutect2IntegrationTest.java | 32 +++++++++---------- .../tools/mutect/mito/initialFiltered.vcf | 6 ++-- 5 files changed, 34 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java index 44592209142..529cec962c6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java @@ -41,7 +41,7 @@ public static String getMergedASFilterString(VariantContext vc, List is } public static List addFilter(List currentFilters, String newFilter) { - if (currentFilters.size() == 1 && currentFilters.contains(VCFConstants.PASSES_FILTERS_v4)) { + if (currentFilters.size() == 1 && currentFilters.contains(GATKVCFConstants.SITE_LEVEL_FILTERS)) { return Collections.singletonList(newFilter); } else { List updated = new ArrayList<>(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 0f4fde205f8..8d62c7df907 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -197,7 +197,7 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext ListIterator mergedFilterStringByAllele = distinctFiltersByAllele.stream().map(AnnotationUtils::encodeStringList).collect(Collectors.toList()).listIterator(); List orderedASFilterStrings = vc.getAlternateAlleles().stream().map(allele -> allele.isSymbolic() ? - VCFConstants.EMPTY_INFO_FIELD : mergedFilterStringByAllele.next()).collect(Collectors.toList()); + GATKVCFConstants.SITE_LEVEL_FILTERS : mergedFilterStringByAllele.next()).collect(Collectors.toList()); String finalAttrString = AnnotationUtils.encodeAnyASListWithRawDelim(orderedASFilterStrings); vcb.putAttributes(Collections.singletonMap(GATKVCFConstants.AS_FILTER_STATUS_KEY, finalAttrString)); @@ -206,7 +206,7 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext // compute site-only filters // from allele specific filters alleleStatusByFilter.stream().forEachOrdered(alleleStatusForFilter -> { - if (!alleleStatusForFilter.isEmpty() && alleleStatusForFilter.stream().distinct().count() == 1 && !alleleStatusForFilter.contains(VCFConstants.PASSES_FILTERS_v4)) { + if (!alleleStatusForFilter.isEmpty() && alleleStatusForFilter.stream().distinct().count() == 1 && !alleleStatusForFilter.contains(GATKVCFConstants.SITE_LEVEL_FILTERS)) { siteFiltersWithErrorProb.put(alleleStatusForFilter.get(0), 1.0); } }); @@ -231,7 +231,7 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext if (siteFiltersWithErrorProb.isEmpty() && !distinctFiltersByAllele.stream().allMatch(List::isEmpty)) { List> filtersNonSymbolicAlleles = GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, distinctFiltersByAllele); // if any allele passed, don't fail the site - if (!filtersNonSymbolicAlleles.stream().anyMatch(filterList -> filterList.contains(VCFConstants.EMPTY_INFO_FIELD))) { + if (!filtersNonSymbolicAlleles.stream().anyMatch(filterList -> filterList.contains(GATKVCFConstants.SITE_LEVEL_FILTERS))) { // we know the allele level filters exceeded their threshold - so set this prob to 1 siteFiltersWithErrorProb.put(GATKVCFConstants.FAIL, 1.0); } @@ -250,30 +250,32 @@ public VariantContext applyFiltersAndAccumulateOutputStats(final VariantContext } /** - * Creates a list of the string names of all the filters that apply to the allele, or the string . if it passed all filters + * Creates a list of the string names of all the filters that apply to the allele, or the string "SITE" if it passed all allele filters * @param filtersForAllele all the filters applied to the allele - * @return list of filter names that apply to the allele or . + * @return list of filter names that apply to the allele or SITE */ private List getDistinctFiltersForAllele(final List filtersForAllele) { final List results = filtersForAllele.stream().distinct().collect(Collectors.toList()); - results.remove(VCFConstants.PASSES_FILTERS_v4); + if (results.size() > 1 && results.contains(GATKVCFConstants.SITE_LEVEL_FILTERS)) { + results.remove(GATKVCFConstants.SITE_LEVEL_FILTERS); + } if (results.isEmpty()) { - results.add(VCFConstants.EMPTY_INFO_FIELD); + results.add(GATKVCFConstants.SITE_LEVEL_FILTERS); } return results; } /** * For each allele, determine whether the filter should be applied and return either the - * filter name or PASS. We use PASS as a place holder because the results are per alt allele. + * filter name or SITE. We use PASS as a place holder because the results are per alt allele. * @param probabilities the probabilities computed by the filter for the alleles * @param errorThreshold the theshold to use to determine whether filter applies * @param filterName the name of the filter being evaluated - * @return List of filtername or "PASS" for each allele + * @return List of filtername or "SITE" for each allele */ private List addFilterStrings(final List probabilities, final double errorThreshold, final String filterName) { return probabilities.stream().map(value -> value > errorThreshold ? - filterName : VCFConstants.PASSES_FILTERS_v4).collect(Collectors.toList()); + filterName : GATKVCFConstants.SITE_LEVEL_FILTERS).collect(Collectors.toList()); } public static double roundFinitePrecisionErrors(final double probability) { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index cab9ef1ae96..a0d4b950920 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -166,6 +166,8 @@ their names (or descriptions) depend on some threshold. Those filters are not i public static final String POSSIBLE_NUMT_FILTER_NAME = "possible_numt"; public static final String LOW_HET_FILTER_NAME = "mt_many_low_hets"; public static final String FAIL = "FAIL"; + public static final String SITE_LEVEL_FILTERS = "SITE"; + public static final List MUTECT_FILTER_NAMES = Arrays.asList(VCFConstants.PASSES_FILTERS_v4, POLYMERASE_SLIPPAGE, PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_EVIDENCE_FILTER_NAME, GERMLINE_RISK_FILTER_NAME, diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 29702c8720d..91530b33e77 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -524,10 +524,10 @@ public Object[][] vcfsForFiltering() { ImmutableSet.of(GATKVCFConstants.FAIL)), Arrays.asList( Arrays.asList(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // strand_bias, strict_stand - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), // SITE Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), // weak_evidence, low_allele_frac - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), // .|weak_evidence, strand_bias, low_allele_frac|strand_bias, strict_strand, low_allele_frac - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME), // SITE|weak_evidence, strand_bias, low_allele_frac|strand_bias, strict_strand, low_allele_frac + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), // SITE Arrays.asList(GATKVCFConstants.DUPLICATED_EVIDENCE_FILTER_NAME), // duplicate Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME) // weak_evidence, strand_bias, strict_stand|low_allele_frac @@ -540,11 +540,11 @@ public Object[][] vcfsForFiltering() { ImmutableSet.of(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME, GATKVCFConstants.CONTAMINATION_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME, GATKVCFConstants.READ_POSITION_FILTER_NAME, GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME, GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME)), Arrays.asList( - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), //".", - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual, strand_bias|.", - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "weak_evidence, strict_strand, strand_bias|.", - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|.", - Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "weak_evidence, base_qual, map_qual, contamination, strand_artifact, position, low_allele_frac|." + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), // SITE, + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME, GATKVCFConstants.SITE_LEVEL_FILTERS), //"weak_evidence, base_qual, strand_bias|SITE", + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.SITE_LEVEL_FILTERS), // "weak_evidence, strict_strand, strand_bias|SITE", + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.SITE_LEVEL_FILTERS), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|SITE", + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.MEDIAN_MAPPING_QUALITY_FILTER_NAME + ", " + GATKVCFConstants.CONTAMINATION_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.READ_POSITION_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME, GATKVCFConstants.SITE_LEVEL_FILTERS) // "weak_evidence, base_qual, map_qual, contamination, strand_artifact, position, low_allele_frac|SITE" )} }; } @@ -596,10 +596,10 @@ public Object[][] vcfsForNuMTFiltering() { ImmutableSet.of(GATKVCFConstants.FAIL)), Arrays.asList( Arrays.asList(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME), // strand_bias, strict_stand - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), // SITE Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // weak_evidence, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // .|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt - Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), // . + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME), // SITE|weak_evidence, strand_bias, low_allele_frac, possible_numt|strand_bias, strict_strand, low_allele_frac, possible_numt + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), // SITE Arrays.asList(GATKVCFConstants.DUPLICATED_EVIDENCE_FILTER_NAME), // duplicate Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // weak_evidence, strand_bias, strict_stand|low_allele_frac, possible_numt @@ -611,11 +611,11 @@ public Object[][] vcfsForNuMTFiltering() { // Collections.emptySet(), // ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME)), // Arrays.asList( -// Arrays.asList(VCFConstants.EMPTY_INFO_FIELD), //".", -// Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD), //"weak_evidence, base_qual, strand_bias|.", -// Arrays.asList(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD), // "possible_numt|.", -// Arrays.asList(VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD, VCFConstants.EMPTY_INFO_FIELD), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|.", -// Arrays.asList(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, VCFConstants.EMPTY_INFO_FIELD) // "possible_numt|." +// Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), //".", +// Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.SITE_LEVEL_FILTERS), //"weak_evidence, base_qual, strand_bias|.", +// Arrays.asList(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.SITE_LEVEL_FILTERS), // "possible_numt|.", +// Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.SITE_LEVEL_FILTERS), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|.", +// Arrays.asList(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.SITE_LEVEL_FILTERS) // "possible_numt|." // )} }; } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf index fa949fcec29..0eefc1e3f6b 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/initialFiltered.vcf @@ -65,9 +65,9 @@ ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 chrM 152 . T C . strand_bias;strict_strand AS_FilterStatus=strand_bias,strict_strand;AS_SB_TABLE=0,3|0,1556;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -chrM 263 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=1,0|431,400;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 263 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=1,0|431,400;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 chrM 301 . A AC . low_allele_frac;weak_evidence AS_FilterStatus=weak_evidence,low_allele_frac;AS_SB_TABLE=500,149|41,12;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=.|weak_evidence,strand_bias,low_allele_frac|strand_bias,strict_strand,low_allele_frac;AS_SB_TABLE=0,5|51,350|60,7|49,0;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 -chrM 310 . T TC . PASS AS_FilterStatus=.;AS_SB_TABLE=0,0|300,358;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=SITE|weak_evidence,strand_bias,low_allele_frac|strand_bias,strict_strand,low_allele_frac;AS_SB_TABLE=0,5|51,350|60,7|49,0;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . PASS AS_FilterStatus=SITE;AS_SB_TABLE=0,0|300,358;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 chrM 750 . A G . duplicate AS_FilterStatus=duplicate;AS_SB_TABLE=0,1|100,1424;AS_UNIQ_ALT_READ_COUNT=2;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true chrM 802 . A C,ACC . FAIL AS_FilterStatus=weak_evidence,strand_bias,strict_strand|low_allele_frac;AS_SB_TABLE=55,50|0,301|30,37;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08;TLOD=10.66,891.23 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2:105,301,67:0.636,0.141:2,163,35:3,238,32:20,20,30:419,316,340:60,60,60:41,33 From 54bd5db4e986a07e3e292ec2a8458d033daab1f8 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 9 Mar 2020 16:13:07 -0400 Subject: [PATCH 61/85] more output for failing test --- .../hellbender/utils/variant/GATKVCFConstants.java | 3 +-- .../hellbender/utils/variant/GATKVCFHeaderLines.java | 2 +- .../mutect/filtering/MTLowHeteroplasmyFilterToolTest.java | 5 +++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index a0d4b950920..601c3c16423 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -161,7 +161,6 @@ their names (or descriptions) depend on some threshold. Those filters are not i public final static String BAD_HAPLOTYPE_FILTER_NAME = "haplotype"; public final static String STRICT_STRAND_BIAS_FILTER_NAME = "strict_strand"; public final static String N_RATIO_FILTER_NAME = "n_ratio"; - public final static String CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME = "numt_chimera"; //mitochondria public final static String ALLELE_FRACTION_FILTER_NAME = "low_allele_frac"; public static final String POSSIBLE_NUMT_FILTER_NAME = "possible_numt"; public static final String LOW_HET_FILTER_NAME = "mt_many_low_hets"; @@ -175,7 +174,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i MEDIAN_BASE_QUALITY_FILTER_NAME, MEDIAN_MAPPING_QUALITY_FILTER_NAME, MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, READ_POSITION_FILTER_NAME, CONTAMINATION_FILTER_NAME, DUPLICATED_EVIDENCE_FILTER_NAME, - READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME, CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME, + READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME, STRICT_STRAND_BIAS_FILTER_NAME, N_RATIO_FILTER_NAME, ALLELE_FRACTION_FILTER_NAME, POSSIBLE_NUMT_FILTER_NAME, FAIL); public static final List MUTECT_AS_FILTER_NAMES = Arrays.asList(AS_FILTER_STATUS_KEY); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index eede81b5556..3ae4407fa32 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -97,10 +97,10 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf addFilterLine(new VCFFilterHeaderLine(ALLELE_FRACTION_FILTER_NAME, "Allele fraction is below specified threshold")); //Mitochondrial M2-related filters - addFilterLine(new VCFFilterHeaderLine(CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME, "NuMT variant with too many ALT reads originally from autosome")); addFilterLine(new VCFFilterHeaderLine(POSSIBLE_NUMT_FILTER_NAME, "Allele depth is below expected coverage of NuMT in autosome")); addFilterLine(new VCFFilterHeaderLine(LOW_HET_FILTER_NAME, "All low heteroplasmy sites are filtered when at least x low het sites pass all other filters")); addFilterLine(new VCFFilterHeaderLine(FAIL, "Fail the site if all alleles fail but for different reasons.")); + addFilterLine(new VCFFilterHeaderLine(SITE_LEVEL_FILTERS, "There are no allele specific filters that apply to this allele. Only site level filters apply.")); addFilterLine(new VCFFilterHeaderLine(LOW_HET_FILTER_NAME, "All low heteroplasmy sites are filtered when at least x low het sites pass all other filters")); addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java index 0d8f9ba1a05..2dda560cd02 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java @@ -19,7 +19,7 @@ public class MTLowHeteroplasmyFilterToolTest extends CommandLineProgramTest { @Test public void testLowHetVariantsFiltered() { - final Set low_het_sites = new HashSet<>(Arrays.asList(301, 302)); + final Set low_het_sites = new HashSet<>(Arrays.asList(302, 301)); final File outputFile = createTempFile("low-het-test", ".vcf"); final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() .addReference(MITO_REF.getAbsolutePath()) @@ -29,7 +29,8 @@ public void testLowHetVariantsFiltered() { runCommandLine(argsBuilder); Set variants = VariantContextTestUtils.streamVcf(outputFile) .filter(vcf -> vcf.getFilters().contains(GATKVCFConstants.LOW_HET_FILTER_NAME)).collect(Collectors.toSet()); - Assert.assertEquals(variants.stream().map(var -> var.getStart()).collect(Collectors.toList()), low_het_sites, "exprected these sites to have " + GATKVCFConstants.LOW_HET_FILTER_NAME + " filter."); + Set actual_sites = variants.stream().map(var -> var.getStart()).collect(Collectors.toSet()); + Assert.assertEquals(actual_sites, low_het_sites, "exprected these sites " + actual_sites + " to have " + GATKVCFConstants.LOW_HET_FILTER_NAME + " filter."); } @Test From 6b496b85b59e14ba98a6736509e7d996e4a1d739 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 12 Mar 2020 16:05:34 -0400 Subject: [PATCH 62/85] make MT filter low het tool allele specific --- .../MTLowHeteroplasmyFilterTool.java | 35 ++++++++++++++++--- .../MTLowHeteroplasmyFilterToolTest.java | 30 +++++++++++++--- .../hellbender/tools/mutect/mito/filtered.vcf | 10 +++--- 3 files changed, 61 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java index b095089731d..aeb11df1599 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -19,6 +20,7 @@ import java.io.File; import java.util.Arrays; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; import java.util.stream.Collectors; @@ -63,7 +65,7 @@ public void onTraversalStart() { @Override protected void firstPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { // if the site is not filtered but it is low het, increment counter - if (variant.isNotFiltered() && isLowHeteroplasmy(variant)) { + if (variant.isNotFiltered() && isSiteLowHeteroplasmy(variant)) { unfilteredLowHetSites++; } } @@ -76,8 +78,14 @@ protected void afterFirstPass() { @Override protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { VariantContextBuilder vcb = new VariantContextBuilder(variant); - if (failedLowHet && isLowHeteroplasmy(variant)) { - vcb.filter(GATKVCFConstants.LOW_HET_FILTER_NAME); + if (failedLowHet) { + List appliedFilter = areAllelesArtifacts(variant); + if (!appliedFilter.contains(Boolean.FALSE)) { + vcb.filter(filterName()); + } + if (appliedFilter.contains(Boolean.TRUE)) { + vcb.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AlleleFilterUtils.getMergedASFilterString(variant, appliedFilter, filterName())); + } } vcfWriter.add(vcb.make()); } @@ -89,12 +97,29 @@ public void closeTool() { } } - protected boolean isLowHeteroplasmy(VariantContext v) { + protected String filterName() { + return GATKVCFConstants.LOW_HET_FILTER_NAME; + } + + public List getData(Genotype g) { + return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); + } + + protected boolean isSiteLowHeteroplasmy(VariantContext v) { return v.getGenotypes().stream().map(g -> lowestAF(g)).min(Double::compareTo).orElse(0.0) < lowHetThreshold; } + protected List areAllelesArtifacts(final VariantContext vc) { + VariantContextBuilder vcb = new VariantContextBuilder(vc); + LinkedHashMap> dataByAllele = Mutect2AlleleFilter.getDataByAllele(vc, Genotype::hasAD, this::getData, null); + Integer total = dataByAllele.values().stream().map(alleleCounts -> alleleCounts.stream().max(Integer::compareTo).orElse(0)).mapToInt(Integer::intValue).sum(); + return dataByAllele.entrySet().stream() + .filter(entry -> !vc.getReference().equals(entry.getKey())) + .map(entry -> (entry.getValue().stream().max(Integer::compareTo).orElse(0) / (double) total) < lowHetThreshold).collect(Collectors.toList()); + } + protected double lowestAF(Genotype g) { - List depths = Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); + List depths = getData(g); return Collections.min(depths.subList(1, depths.size())) / (double) depths.stream().mapToInt(Integer::intValue).sum(); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java index 2dda560cd02..ff90073638d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterToolTest.java @@ -5,8 +5,10 @@ import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -17,9 +19,24 @@ public class MTLowHeteroplasmyFilterToolTest extends CommandLineProgramTest { private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta"); private static final File NA12878_MITO_FILTERED_VCF = new File(toolsTestDir, "mutect/mito/filtered.vcf"); - @Test - public void testLowHetVariantsFiltered() { - final Set low_het_sites = new HashSet<>(Arrays.asList(302, 301)); + @DataProvider(name = "lowhetData") + public Object[][] vcfsForNuMTFiltering() { + return new Object[][]{{ + Arrays.asList( + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), // SITE, + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), // SITE, + Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME+ ", " + GATKVCFConstants.LOW_HET_FILTER_NAME), // weak_evidence, low_allele_frac, possible_numt + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME + ", " + GATKVCFConstants.LOW_HET_FILTER_NAME), //"SITE|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt,mt_many_low_hets, + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), // SITE, + Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS) // SITE, + )} + }; + } + + + @Test(dataProvider = "lowhetData") + public void testLowHetVariantsFiltered(List> expectedASFilters) { + final Set low_het_sites = new HashSet<>(Arrays.asList(301)); final File outputFile = createTempFile("low-het-test", ".vcf"); final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() .addReference(MITO_REF.getAbsolutePath()) @@ -30,7 +47,12 @@ public void testLowHetVariantsFiltered() { Set variants = VariantContextTestUtils.streamVcf(outputFile) .filter(vcf -> vcf.getFilters().contains(GATKVCFConstants.LOW_HET_FILTER_NAME)).collect(Collectors.toSet()); Set actual_sites = variants.stream().map(var -> var.getStart()).collect(Collectors.toSet()); - Assert.assertEquals(actual_sites, low_het_sites, "exprected these sites " + actual_sites + " to have " + GATKVCFConstants.LOW_HET_FILTER_NAME + " filter."); + Assert.assertEquals(actual_sites, low_het_sites, "did not find the correct " + GATKVCFConstants.LOW_HET_FILTER_NAME + " site filters."); + + final List> actualASFilters = VariantContextTestUtils.streamVcf(outputFile) + .map(vc -> AnnotationUtils.decodeAnyASListWithRawDelim(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, ""))).collect(Collectors.toList()); + Assert.assertEquals(actualASFilters, expectedASFilters); + } @Test diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf index 1cb074133f7..17d97f1dbd2 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/filtered.vcf @@ -60,9 +60,9 @@ ##source=FilterMutectCalls ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chrM 152 . T C . PASS AS_FilterStatus=.;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -chrM 263 . A G . numt_chimera AS_FilterStatus=.;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +chrM 152 . T C . PASS AS_FilterStatus=SITE;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +chrM 263 . A G . PASS AS_FilterStatus=SITE;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 chrM 301 . A AC . low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 -chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=.|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 -chrM 310 . T TC . PASS AS_FilterStatus=.;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 -chrM 750 . A G . PASS AS_FilterStatus=.;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true +chrM 302 . A AC,C,ACC . PASS AS_FilterStatus=SITE|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +chrM 310 . T TC . PASS AS_FilterStatus=SITE;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +chrM 750 . A G . PASS AS_FilterStatus=SITE;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 711ae07099bbaa422c0cbc9a2b3f838ba43dfcc4 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 17 Mar 2020 10:29:17 -0400 Subject: [PATCH 63/85] update the passing indicator to SITE --- .../variant/GATKVariantContextUtils.java | 5 +- .../expected_split_with_AS_filters.vcf | 80 +++++++++---------- .../test_split_with_AS_filters.vcf | 76 +++++++++--------- 3 files changed, 81 insertions(+), 80 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 66b46da831f..6be0eafd26f 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -1428,7 +1428,8 @@ public static List splitSomaticVariantContextToBiallelics(final // now add the allele specific filters to the variant context String filters = (String) attributes.get(GATKVCFConstants.AS_FILTER_STATUS_KEY); - if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { + // checking for . and PASS should be able to be removed. these were temporarily used to indicate no allele specific filter + if (filters != null && !filters.isEmpty() && !filters.equals(VCFConstants.EMPTY_INFO_FIELD) && !filters.equals(GATKVCFConstants.SITE_LEVEL_FILTERS) && !filters.equals((VCFConstants.PASSES_FILTERS_v4))) { AnnotationUtils.decodeAnyASList(filters).stream().forEach(filter -> builder.filter(filter)); } @@ -1452,7 +1453,7 @@ public static void splitASSBTable(VariantContext vc, List> a public static void splitASFilters(VariantContext vc, List> attrsByAllele) { // the reason we are getting as list and then joining on , is because the default getAttributeAsString for a list will add spaces between items which we don't // want to have to trim out later in the code - String asfiltersStr = String.join(",", vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_FILTER_STATUS_KEY, VCFConstants.EMPTY_INFO_FIELD)); + String asfiltersStr = String.join(",", vc.getCommonInfo().getAttributeAsStringList(GATKVCFConstants.AS_FILTER_STATUS_KEY, GATKVCFConstants.SITE_LEVEL_FILTERS)); List filtersList = AnnotationUtils.decodeAnyASListWithRawDelim(asfiltersStr); new IndexRange(0, filtersList.size()).forEach(i -> attrsByAllele.get(i).put(GATKVCFConstants.AS_FILTER_STATUS_KEY, filtersList.get(i))); } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf index 79831aa7fa2..2759d23b0e4 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_split_with_AS_filters.vcf @@ -75,8 +75,8 @@ ##source=FilterMutectCalls ##tumor_sample=01C05110 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 -chrM 73 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,1|4689,4744;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31111.83 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 -chrM 263 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,6|2124,4436;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21614.00 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 +chrM 73 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5,1|4689,4744;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31111.83 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 +chrM 263 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=0,6|2124,4436;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21614.00 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 chrM 301 . A ACCC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=906,3426|3,158;DP=5348;ECNT=4;MBQ=20,20;MFRL=393,387;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=7.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:4332,161:6.324e-03:5026:1397,51:1767,86:906,3426,3,691 chrM 301 . A ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=906,3426|0,533;DP=5348;ECNT=4;MBQ=20,20;MFRL=393,398;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=70.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:4332,533:0.040:5026:1397,226:1767,234:906,3426,3,691 chrM 302 . A AC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|2,216;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,396;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=219.45 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/././././.:118,218:0.049:4311:31,78:25,109:103,15,473,3720 @@ -84,55 +84,55 @@ chrM 302 . A C . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_ chrM 302 . A ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|20,2315;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,393;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=4415.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/././.:118,2335:0.637:4311:31,752:25,1245:103,15,473,3720 chrM 302 . A ACCC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=103,15|4,392;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,387;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=421.50 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/./.:118,396:0.098:4311:31,135:25,195:103,15,473,3720 chrM 302 . A ACCCC . blacklisted_site;possible_numt;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=103,15|1,24;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,379;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=5.38 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1/.:118,25:4.163e-03:4311:31,7:25,11:103,15,473,3720 -chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=103,15|433,0;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,394;MMQ=60,60;MPOS=8;OCM=0;POPAF=2.40;TLOD=111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././././1:118,433:0.071:4311:31,153:25,150:103,15,473,3720 +chrM 302 . A ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=SITE;AS_SB_TABLE=103,15|433,0;DP=5359;ECNT=4;MBQ=10,20;MFRL=396,394;MMQ=60,60;MPOS=8;OCM=0;POPAF=2.40;TLOD=111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././././1:118,433:0.071:4311:31,153:25,150:103,15,473,3720 chrM 310 . T C . base_qual;blacklisted_site;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=1,2|357,46;DP=5034;ECNT=4;MBQ=20,2;MFRL=159,398;MMQ=60,60;MPOS=4;OCM=0;POPAF=2.40;TLOD=95.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,403:0.049:4602:3,66:0,82:1,2,563,4036 chrM 310 . T TC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=1,2|206,3990;DP=5034;ECNT=4;MBQ=20,30;MFRL=159,393;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=12037.35 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,4196:0.950:4602:3,1356:0,1920:1,2,563,4036 -chrM 499 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=2,8|3987,4458;DP=8755;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24353.87 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8445:1.000:8455:4,3065:2,4110:2,8,3987,4458 -chrM 750 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5187,5182;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34231.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 +chrM 499 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,8|3987,4458;DP=8755;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24353.87 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8445:1.000:8455:4,3065:2,4110:2,8,3987,4458 +chrM 750 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5,4|5187,5182;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34231.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 chrM 824 . T C . possible_numt;strand_bias AS_FilterStatus=strand_bias,possible_numt;AS_SB_TABLE=5090,5150|20,24;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=23.47 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.440e-03:10284:4925,18:5163,23:5090,5150,20,24 -chrM 827 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5119,5160;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34480.59 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 -chrM 1438 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,4|5322,5192;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35232.81 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 -chrM 2706 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,1|5027,5476;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34736.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 -chrM 3380 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=5228,4776|226,212;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=682.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.040:10442:5044,209:4684,218:5228,4776,226,212 -chrM 3547 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,4|4781,5338;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34104.77 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 -chrM 3552 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4667,5218|89,95;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=190.32 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.017:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 +chrM 827 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,5|5119,5160;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34480.59 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 +chrM 1438 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=0,4|5322,5192;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35232.81 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 +chrM 2706 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,1|5027,5476;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34736.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 +chrM 3380 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5228,4776|226,212;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=682.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.040:10442:5044,209:4684,218:5228,4776,226,212 +chrM 3547 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=0,4|4781,5338;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34104.77 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 +chrM 3552 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=4667,5218|89,95;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=190.32 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.017:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 chrM 3565 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=4647,4794|10,453;DP=10330;ECNT=4;MBQ=30,10;MFRL=394,401;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=22.50 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9441,463:6.928e-03:9904:3959,41:4613,20:0|1:3547_A_G:3547:4647,4794,10,453 chrM 3577 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=4405,5325|240,28;DP=10292;ECNT=4;MBQ=30,10;MFRL=395,394;MMQ=60,60;MPOS=42;OCM=0;POPAF=2.40;TLOD=1.90 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9730,268:2.324e-03:9998:3662,30:4408,8:0|1:3547_A_G:3547:4405,5325,240,28 -chrM 4769 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5260,4910;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33493.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 -chrM 4820 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5561,4676;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33900.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 -chrM 4977 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,4|4601,5682;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33756.90 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 -chrM 5629 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=5266,4764|129,114;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=318.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10030,243:0.023:10273:5041,137:4759,101:5266,4764,129,114 -chrM 6473 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=6,2|5308,5177;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34705.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 -chrM 6722 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5257,5112;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34855.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 -chrM 7028 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5252,5296;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35041.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 -chrM 7241 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5084,5262;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33382.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 -chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=.;AS_SB_TABLE=24,8|4147,4789;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26951.12 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 -chrM 8736 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4550,4495|778,675;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=3047.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.138:10498:4635,731:4237,699:4550,4495,778,675 -chrM 8860 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,6|5415,5158;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35027.25 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:0.999:10584:4,5291:6,4983:5,6,5415,5158 -chrM 9098 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=6,1|5173,5354;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35177.72 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 -chrM 9950 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=1,4|4725,5702;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34370.22 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 +chrM 4769 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5,4|5260,4910;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33493.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 +chrM 4820 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,2|5561,4676;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33900.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 +chrM 4977 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,4|4601,5682;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33756.90 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 +chrM 5629 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5266,4764|129,114;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=318.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10030,243:0.023:10273:5041,137:4759,101:5266,4764,129,114 +chrM 6473 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=6,2|5308,5177;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34705.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 +chrM 6722 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,2|5257,5112;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34855.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 +chrM 7028 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,5|5252,5296;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35041.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 +chrM 7241 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,5|5084,5262;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33382.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 +chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=24,8|4147,4789;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26951.12 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 +chrM 8736 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=4550,4495|778,675;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=3047.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.138:10498:4635,731:4237,699:4550,4495,778,675 +chrM 8860 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5,6|5415,5158;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35027.25 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:0.999:10584:4,5291:6,4983:5,6,5415,5158 +chrM 9098 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=6,1|5173,5354;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35177.72 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 +chrM 9950 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=1,4|4725,5702;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34370.22 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 chrM 10818 . AAC A . possible_numt AS_FilterStatus=possible_numt;AS_SB_TABLE=4903,5145|38,35;DP=10642;ECNT=1;MBQ=30,30;MFRL=398,413;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;RPA=2,1;RU=AC;STR;TLOD=125.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10048,73:7.543e-03:10121:5053,34:4549,37:4903,5145,38,35 -chrM 11177 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5589,4844;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34411.63 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 -chrM 11276 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4746,4664|480,463;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1757.06 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.090:10353:4797,486:4475,427:4746,4664,480,463 -chrM 11719 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=4,1|5153,5407;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34816.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 -chrM 13590 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,1|5146,5293;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33992.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 -chrM 13606 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5057,5000|175,155;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=430.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10057,330:0.030:10387:5060,168:4774,151:5057,5000,175,155 -chrM 14766 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=7,6|5582,4740;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33392.24 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 -chrM 15326 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|5077,5355;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34351.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 -chrM 15535 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=1,13|5374,5027;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33834.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 +chrM 11177 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,2|5589,4844;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34411.63 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 +chrM 11276 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=4746,4664|480,463;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1757.06 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.090:10353:4797,486:4475,427:4746,4664,480,463 +chrM 11719 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=4,1|5153,5407;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34816.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 +chrM 13590 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,1|5146,5293;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33992.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 +chrM 13606 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5057,5000|175,155;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=430.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10057,330:0.030:10387:5060,168:4774,151:5057,5000,175,155 +chrM 14766 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=7,6|5582,4740;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33392.24 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 +chrM 15326 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,2|5077,5355;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34351.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 +chrM 15535 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=1,13|5374,5027;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33834.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 chrM 16149 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=1731,3292|5,170;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=3.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.489e-03:5198:2065,10:2653,12:1731,3292,5,170 chrM 16175 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=903,2767|6,185;DP=4056;ECNT=8;MBQ=20,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.16 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3670,191:6.729e-03:3861:1180,16:2025,12:903,2767,6,185 -chrM 16179 . CAA C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=201,1279|160,487;DP=3867;ECNT=8;MBQ=30,30;MFRL=397,396;MMQ=60,60;MPOS=29;OCM=0;POPAF=2.40;RPA=4,2;RU=A;STR;TLOD=926.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./.:1480,647:0.272:2701:462,165:869,424:201,1279,206,1015 -chrM 16179 . CA C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=201,1279|44,264;DP=3867;ECNT=8;MBQ=30,30;MFRL=397,397;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;RPA=4,3;RU=A;STR;TLOD=330.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/.:1480,308:0.107:2701:462,117:869,175:201,1279,206,1015 +chrM 16179 . CAA C . blacklisted_site AS_FilterStatus=SITE;AS_SB_TABLE=201,1279|160,487;DP=3867;ECNT=8;MBQ=30,30;MFRL=397,396;MMQ=60,60;MPOS=29;OCM=0;POPAF=2.40;RPA=4,2;RU=A;STR;TLOD=926.74 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./.:1480,647:0.272:2701:462,165:869,424:201,1279,206,1015 +chrM 16179 . CA C . blacklisted_site AS_FilterStatus=SITE;AS_SB_TABLE=201,1279|44,264;DP=3867;ECNT=8;MBQ=30,30;MFRL=397,397;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;RPA=4,3;RU=A;STR;TLOD=330.32 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/.:1480,308:0.107:2701:462,117:869,175:201,1279,206,1015 chrM 16179 . CAAA C . blacklisted_site;strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=201,1279|2,264;DP=3867;ECNT=8;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=38;OCM=0;POPAF=2.40;RPA=4,1;RU=A;STR;TLOD=75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1:1480,266:0.057:2701:462,48:869,191:201,1279,206,1015 chrM 16181 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=662,1960|27,694;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=65.50 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2622,721:0.157:3343:700,60:1279,245:662,1960,27,694 -chrM 16182 . A C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=246,976|137,1340;DP=3647;ECNT=8;MBQ=20,30;MFRL=397,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2944.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1222,1477:0.559:2699:233,442:621,872:246,976,137,1340 -chrM 16183 . A C . PASS AS_FilterStatus=.;AS_SB_TABLE=57,40|175,1604;DP=3634;ECNT=8;MBQ=20,30;MFRL=389,397;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=4075.79 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./././.:97,1779:0.698:2656:26,572:31,1049:57,40,306,2253 -chrM 16183 . A ACCC . PASS AS_FilterStatus=.;AS_SB_TABLE=57,40|56,357;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,396;MMQ=60,60;MPOS=-2147483648;OCM=0;POPAF=2.40;TLOD=494.17 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/././.:97,413:0.165:2656:26,52:31,278:57,40,306,2253 +chrM 16182 . A C . blacklisted_site AS_FilterStatus=SITE;AS_SB_TABLE=246,976|137,1340;DP=3647;ECNT=8;MBQ=20,30;MFRL=397,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2944.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1222,1477:0.559:2699:233,442:621,872:246,976,137,1340 +chrM 16183 . A C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=57,40|175,1604;DP=3634;ECNT=8;MBQ=20,30;MFRL=389,397;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=4075.79 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/./././.:97,1779:0.698:2656:26,572:31,1049:57,40,306,2253 +chrM 16183 . A ACCC . PASS AS_FilterStatus=SITE;AS_SB_TABLE=57,40|56,357;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,396;MMQ=60,60;MPOS=-2147483648;OCM=0;POPAF=2.40;TLOD=494.17 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1/././.:97,413:0.165:2656:26,52:31,278:57,40,306,2253 chrM 16183 . A ACCCC . strand_bias AS_FilterStatus=strand_bias;AS_SB_TABLE=57,40|5,241;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,393;MMQ=60,60;MPOS=21;OCM=0;POPAF=2.40;TLOD=190.34 GT:AD:AF:DP:F1R2:F2R1:SB 0/././1/./.:97,246:0.101:2656:26,22:31,177:57,40,306,2253 chrM 16183 . A ACCCCC . possible_numt;weak_evidence AS_FilterStatus=weak_evidence,possible_numt;AS_SB_TABLE=57,40|0,51;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,377;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=2.16 GT:AD:AF:DP:F1R2:F2R1:SB 0/./././1/.:97,51:5.638e-03:2656:26,2:31,36:57,40,306,2253 chrM 16183 . A ACCCCCCCCCCCCCCCC . possible_numt AS_FilterStatus=possible_numt;AS_SB_TABLE=57,40|70,0;DP=3634;ECNT=8;MBQ=20,20;MFRL=389,402;MMQ=60,60;MPOS=-2147483648;OCM=0;POPAF=2.40;TLOD=10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/././././1:97,70:8.474e-03:2656:26,19:31,19:57,40,306,2253 -chrM 16189 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=1,2|596,2826;DP=3540;ECNT=8;MBQ=30,30;MFRL=414,397;MMQ=60,60;MPOS=25;OCM=0;POPAF=2.40;TLOD=14669.67 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,3422:0.998:3431:2,1023:1,2014:1,2,601,2827 +chrM 16189 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=1,2|596,2826;DP=3540;ECNT=8;MBQ=30,30;MFRL=414,397;MMQ=60,60;MPOS=25;OCM=0;POPAF=2.40;TLOD=14669.67 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/.:3,3422:0.998:3431:2,1023:1,2014:1,2,601,2827 chrM 16189 . T A . possible_numt;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=1,2|5,1;DP=3540;ECNT=8;MBQ=30,25;MFRL=414,369;MMQ=60,60;MPOS=4;OCM=0;POPAF=2.40;TLOD=3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/./1:3,6:1.168e-03:3431:2,2:1,2:1,2,601,2827 -chrM 16217 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|1114,3004;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14884.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 -chrM 16519 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=3,3|4970,4301;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30665.46 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 +chrM 16217 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,2|1114,3004;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14884.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 +chrM 16519 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,3|4970,4301;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30665.46 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf index 7ec5bd00085..53de1784190 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_split_with_AS_filters.vcf @@ -72,50 +72,50 @@ ##source=VariantFiltration ##tumor_sample=01C05110 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 01C05110 -chrM 73 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,1|4689,4744;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31111.83 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 -chrM 263 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,6|2124,4436;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21614.00 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 +chrM 73 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5,1|4689,4744;DP=9619;ECNT=1;MBQ=15,30;MFRL=394,396;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=31111.83 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9433:1.000:9439:2,4683:1,4510:5,1,4689,4744 +chrM 263 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=0,6|2124,4436;DP=6761;ECNT=4;MBQ=20,30;MFRL=411,395;MMQ=60,60;MPOS=33;OCM=0;POPAF=2.40;TLOD=21614.00 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,6560:1.000:6566:3,2777:1,3265:0,6,2124,4436 chrM 301 . A ACCC,ACC . blacklisted_site;strand_bias AS_FilterStatus=strand_bias|strand_bias;AS_SB_TABLE=906,3426|3,158|0,533;DP=5348;ECNT=4;MBQ=20,20,20;MFRL=393,387,398;MMQ=60,60,60;MPOS=34,33;OCM=0;POPAF=2.40,2.40;TLOD=7.86,70.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:4332,161,533:6.324e-03,0.040:5026:1397,51,226:1767,86,234:906,3426,3,691 -chrM 302 . A AC,C,ACC,ACCC,ACCCC,ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=strand_bias|strand_bias|strand_bias|strand_bias|weak_evidence,strand_bias,possible_numt|.;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5/6:118,218,786,2335,396,25,433:0.049,0.134,0.637,0.098,4.163e-03,0.071:4311:31,78,364,752,135,7,153:25,109,386,1245,195,11,150:103,15,473,3720 +chrM 302 . A AC,C,ACC,ACCC,ACCCC,ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC . blacklisted_site AS_FilterStatus=strand_bias|strand_bias|strand_bias|strand_bias|weak_evidence,strand_bias,possible_numt|SITE;AS_SB_TABLE=103,15|2,216|13,773|20,2315|4,392|1,24|433,0;DP=5359;ECNT=4;MBQ=10,20,30,20,20,20,20;MFRL=396,396,396,393,387,379,394;MMQ=60,60,60,60,60,60,60;MPOS=34,34,37,34,32,8;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40,2.40;TLOD=219.45,423.29,4415.54,421.50,5.38,111.58 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5/6:118,218,786,2335,396,25,433:0.049,0.134,0.637,0.098,4.163e-03,0.071:4311:31,78,364,752,135,7,153:25,109,386,1245,195,11,150:103,15,473,3720 chrM 310 . T C,TC . blacklisted_site;strand_bias AS_FilterStatus=base_qual,strand_bias|strand_bias;AS_SB_TABLE=1,2|357,46|206,3990;DP=5034;ECNT=4;MBQ=20,2,30;MFRL=159,398,393;MMQ=60,60,60;MPOS=4,34;OCM=0;POPAF=2.40,2.40;TLOD=95.66,12037.35 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:3,403,4196:0.049,0.950:4602:3,66,1356:0,82,1920:1,2,563,4036 -chrM 499 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=2,8|3987,4458;DP=8755;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24353.87 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8445:1.000:8455:4,3065:2,4110:2,8,3987,4458 -chrM 750 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5187,5182;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34231.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 +chrM 499 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,8|3987,4458;DP=8755;ECNT=1;MBQ=20,30;MFRL=347,395;MMQ=60,60;MPOS=34;OCM=0;POPAF=2.40;TLOD=24353.87 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10,8445:1.000:8455:4,3065:2,4110:2,8,3987,4458 +chrM 750 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5,4|5187,5182;DP=10633;ECNT=3;MBQ=10,30;MFRL=407,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34231.18 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10369:1.000:10378:2,4823:1,5267:5,4,5187,5182 chrM 824 . T C . possible_numt;strand_bias AS_FilterStatus=strand_bias,possible_numt;AS_SB_TABLE=5090,5150|20,24;DP=10560;ECNT=3;MBQ=30,30;MFRL=398,401;MMQ=60,60;MPOS=43;OCM=0;POPAF=2.40;TLOD=23.47 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10240,44:3.440e-03:10284:4925,18:5163,23:5090,5150,20,24 -chrM 827 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5119,5160;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34480.59 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 -chrM 1438 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,4|5322,5192;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35232.81 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 -chrM 2706 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,1|5027,5476;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34736.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 -chrM 3380 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=5228,4776|226,212;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=682.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.040:10442:5044,209:4684,218:5228,4776,226,212 -chrM 3547 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=0,4|4781,5338;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34104.77 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 -chrM 3552 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4667,5218|89,95;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=190.32 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.017:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 +chrM 827 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,5|5119,5160;DP=10567;ECNT=3;MBQ=20,30;MFRL=387,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34480.59 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10279:1.000:10286:1,4951:4,5144:2,5,5119,5160 +chrM 1438 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=0,4|5322,5192;DP=10800;ECNT=1;MBQ=20,30;MFRL=416,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35232.81 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10514:1.000:10518:0,5098:2,5234:0,4,5322,5192 +chrM 2706 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,1|5027,5476;DP=10768;ECNT=1;MBQ=10,30;MFRL=406,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34736.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3,10503:1.000:10506:1,5177:0,5071:2,1,5027,5476 +chrM 3380 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5228,4776|226,212;DP=10769;ECNT=1;MBQ=30,30;MFRL=396,398;MMQ=60,60;MPOS=39;OCM=0;POPAF=2.40;TLOD=682.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10004,438:0.040:10442:5044,209:4684,218:5228,4776,226,212 +chrM 3547 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=0,4|4781,5338;DP=10358;ECNT=4;MBQ=30,30;MFRL=425,394;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34104.77 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:4,10119:1.000:10123:1,4799:3,4869:0|1:3547_A_G:3547:0,4,4781,5338 +chrM 3552 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=4667,5218|89,95;DP=10345;ECNT=4;MBQ=30,30;MFRL=394,391;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=190.32 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9885,184:0.017:10069:4740,89:4936,83:0|1:3547_A_G:3547:4667,5218,89,95 chrM 3565 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=4647,4794|10,453;DP=10330;ECNT=4;MBQ=30,10;MFRL=394,401;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=22.50 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9441,463:6.928e-03:9904:3959,41:4613,20:0|1:3547_A_G:3547:4647,4794,10,453 chrM 3577 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=4405,5325|240,28;DP=10292;ECNT=4;MBQ=30,10;MFRL=395,394;MMQ=60,60;MPOS=42;OCM=0;POPAF=2.40;TLOD=1.90 GT:AD:AF:DP:F1R2:F2R1:PGT:PID:PS:SB 0|1:9730,268:2.324e-03:9998:3662,30:4408,8:0|1:3547_A_G:3547:4405,5325,240,28 -chrM 4769 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,4|5260,4910;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33493.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 -chrM 4820 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5561,4676;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33900.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 -chrM 4977 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,4|4601,5682;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33756.90 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 -chrM 5629 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=5266,4764|129,114;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=318.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10030,243:0.023:10273:5041,137:4759,101:5266,4764,129,114 -chrM 6473 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=6,2|5308,5177;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34705.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 -chrM 6722 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5257,5112;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34855.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 -chrM 7028 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5252,5296;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35041.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 -chrM 7241 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,5|5084,5262;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33382.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 -chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=.;AS_SB_TABLE=24,8|4147,4789;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26951.12 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 -chrM 8736 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4550,4495|778,675;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=3047.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.138:10498:4635,731:4237,699:4550,4495,778,675 -chrM 8860 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5,6|5415,5158;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35027.25 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:0.999:10584:4,5291:6,4983:5,6,5415,5158 -chrM 9098 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=6,1|5173,5354;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35177.72 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 -chrM 9950 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=1,4|4725,5702;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34370.22 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 +chrM 4769 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5,4|5260,4910;DP=10474;ECNT=2;MBQ=20,30;MFRL=263,401;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33493.03 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9,10170:1.000:10179:2,5123:3,4727:5,4,5260,4910 +chrM 4820 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,2|5561,4676;DP=10478;ECNT=2;MBQ=30,30;MFRL=397,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33900.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10237:1.000:10242:3,5326:1,4775:3,2,5561,4676 +chrM 4977 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,4|4601,5682;DP=10552;ECNT=1;MBQ=20,30;MFRL=290,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33756.90 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,10283:1.000:10289:3,5136:3,4901:2,4,4601,5682 +chrM 5629 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5266,4764|129,114;DP=10567;ECNT=1;MBQ=30,30;MFRL=396,402;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;TLOD=318.19 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10030,243:0.023:10273:5041,137:4759,101:5266,4764,129,114 +chrM 6473 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=6,2|5308,5177;DP=10740;ECNT=1;MBQ=20,30;MFRL=419,395;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34705.28 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:8,10485:1.000:10493:3,5076:1,5098:6,2,5308,5177 +chrM 6722 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,2|5257,5112;DP=10644;ECNT=1;MBQ=20,30;MFRL=219,399;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34855.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10369:1.000:10374:3,5154:2,5065:3,2,5257,5112 +chrM 7028 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,5|5252,5296;DP=10855;ECNT=1;MBQ=10,30;MFRL=400,398;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35041.36 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10548:1.000:10555:2,5363:0,5048:2,5,5252,5296 +chrM 7241 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,5|5084,5262;DP=10661;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33382.84 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10346:1.000:10353:3,5191:0,4856:2,5,5084,5262 +chrM 8270 . CACCCCCTCT C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=24,8|4147,4789;DP=9886;ECNT=1;MBQ=30,30;MFRL=419,406;MMQ=60,60;MPOS=41;OCM=0;POPAF=2.40;RPA=2,1;RU=ACCCCCTCT;STR;TLOD=26951.12 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:32,8936:0.999:8968:14,4376:12,4326:24,8,4147,4789 +chrM 8736 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=4550,4495|778,675;DP=10790;ECNT=1;MBQ=30,30;MFRL=399,400;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=3047.11 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9045,1453:0.138:10498:4635,731:4237,699:4550,4495,778,675 +chrM 8860 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5,6|5415,5158;DP=10877;ECNT=1;MBQ=30,30;MFRL=382,402;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=35027.25 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:11,10573:0.999:10584:4,5291:6,4983:5,6,5415,5158 +chrM 9098 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=6,1|5173,5354;DP=10803;ECNT=1;MBQ=20,30;MFRL=413,396;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=35177.72 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:7,10527:1.000:10534:3,5357:2,4995:6,1,5173,5354 +chrM 9950 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=1,4|4725,5702;DP=10681;ECNT=1;MBQ=30,30;MFRL=419,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34370.22 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10427:1.000:10432:2,5059:1,5128:1,4,4725,5702 chrM 10818 . AAC A . possible_numt AS_FilterStatus=possible_numt;AS_SB_TABLE=4903,5145|38,35;DP=10642;ECNT=1;MBQ=30,30;MFRL=398,413;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;RPA=2,1;RU=AC;STR;TLOD=125.93 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10048,73:7.543e-03:10121:5053,34:4549,37:4903,5145,38,35 -chrM 11177 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=3,2|5589,4844;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34411.63 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 -chrM 11276 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=4746,4664|480,463;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1757.06 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.090:10353:4797,486:4475,427:4746,4664,480,463 -chrM 11719 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=4,1|5153,5407;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34816.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 -chrM 13590 . G A . PASS AS_FilterStatus=.;AS_SB_TABLE=3,1|5146,5293;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33992.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 -chrM 13606 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=5057,5000|175,155;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=430.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10057,330:0.030:10387:5060,168:4774,151:5057,5000,175,155 -chrM 14766 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=7,6|5582,4740;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33392.24 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 -chrM 15326 . A G . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|5077,5355;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34351.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 -chrM 15535 . C T . PASS AS_FilterStatus=.;AS_SB_TABLE=1,13|5374,5027;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33834.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 +chrM 11177 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,2|5589,4844;DP=10719;ECNT=1;MBQ=20,30;MFRL=402,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34411.63 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10433:1.000:10438:1,5387:2,4875:3,2,5589,4844 +chrM 11276 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=4746,4664|480,463;DP=10644;ECNT=1;MBQ=30,30;MFRL=396,396;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=1757.06 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:9410,943:0.090:10353:4797,486:4475,427:4746,4664,480,463 +chrM 11719 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=4,1|5153,5407;DP=10841;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=34816.51 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5,10560:1.000:10565:0,5342:1,4913:4,1,5153,5407 +chrM 13590 . G A . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,1|5146,5293;DP=10710;ECNT=2;MBQ=25,30;MFRL=420,397;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=33992.54 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10439:1.000:10443:2,5111:1,4846:3,1,5146,5293 +chrM 13606 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=5057,5000|175,155;DP=10682;ECNT=2;MBQ=30,30;MFRL=397,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=430.15 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:10057,330:0.030:10387:5060,168:4774,151:5057,5000,175,155 +chrM 14766 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=7,6|5582,4740;DP=10635;ECNT=1;MBQ=10,30;MFRL=404,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33392.24 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:13,10322:1.000:10335:2,5039:0,4691:7,6,5582,4740 +chrM 15326 . A G . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,2|5077,5355;DP=10698;ECNT=1;MBQ=15,30;MFRL=393,395;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=34351.86 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,10432:1.000:10436:1,5344:1,4710:2,2,5077,5355 +chrM 15535 . C T . PASS AS_FilterStatus=SITE;AS_SB_TABLE=1,13|5374,5027;DP=10670;ECNT=1;MBQ=10,30;MFRL=412,397;MMQ=60,60;MPOS=36;OCM=0;POPAF=2.40;TLOD=33834.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:14,10401:1.000:10415:2,5015:0,4788:1,13,5374,5027 chrM 16149 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=1731,3292|5,170;DP=5406;ECNT=8;MBQ=30,10;MFRL=396,396;MMQ=60,60;MPOS=49;OCM=0;POPAF=2.40;TLOD=3.39 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:5023,175:3.489e-03:5198:2065,10:2653,12:1731,3292,5,170 chrM 16175 . A C . base_qual;strand_bias;weak_evidence AS_FilterStatus=weak_evidence,base_qual,strand_bias;AS_SB_TABLE=903,2767|6,185;DP=4056;ECNT=8;MBQ=20,10;MFRL=396,401;MMQ=60,60;MPOS=35;OCM=0;POPAF=2.40;TLOD=5.16 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:3670,191:6.729e-03:3861:1180,16:2025,12:903,2767,6,185 -chrM 16179 . CAAA CA,CAA,C . blacklisted_site AS_FilterStatus=.|.|strand_bias;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3:1480,647,308,266:0.272,0.107,0.057:2701:462,165,117,48:869,424,175,191:201,1279,206,1015 +chrM 16179 . CAAA CA,CAA,C . blacklisted_site AS_FilterStatus=SITE|SITE|strand_bias;AS_SB_TABLE=201,1279|160,487|44,264|2,264;DP=3867;ECNT=8;MBQ=30,30,30,30;MFRL=397,396,397,395;MMQ=60,60,60,60;MPOS=29,34,38;OCM=0;POPAF=2.40,2.40,2.40;RPA=4,2,3,1;RU=A;STR;TLOD=926.74,330.32,75.66 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3:1480,647,308,266:0.272,0.107,0.057:2701:462,165,117,48:869,424,175,191:201,1279,206,1015 chrM 16181 . A C . base_qual;strand_bias AS_FilterStatus=base_qual,strand_bias;AS_SB_TABLE=662,1960|27,694;DP=3715;ECNT=8;MBQ=20,10;MFRL=396,398;MMQ=60,60;MPOS=37;OCM=0;POPAF=2.40;TLOD=65.50 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:2622,721:0.157:3343:700,60:1279,245:662,1960,27,694 -chrM 16182 . A C . blacklisted_site AS_FilterStatus=.;AS_SB_TABLE=246,976|137,1340;DP=3647;ECNT=8;MBQ=20,30;MFRL=397,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2944.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1222,1477:0.559:2699:233,442:621,872:246,976,137,1340 -chrM 16183 . A C,ACCC,ACCCC,ACCCCC,ACCCCCCCCCCCCCCCC . PASS AS_FilterStatus=.|.|strand_bias|weak_evidence,possible_numt|possible_numt;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5:97,1779,413,246,51,70:0.698,0.165,0.101,5.638e-03,8.474e-03:2656:26,572,52,22,2,19:31,1049,278,177,36,19:57,40,306,2253 -chrM 16189 . T C,A . PASS AS_FilterStatus=.|weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=1,2|596,2826|5,1;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14669.67,3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:3,3422,6:0.998,1.168e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 -chrM 16217 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=2,2|1114,3004;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14884.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 -chrM 16519 . T C . PASS AS_FilterStatus=.;AS_SB_TABLE=3,3|4970,4301;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30665.46 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 +chrM 16182 . A C . blacklisted_site AS_FilterStatus=SITE;AS_SB_TABLE=246,976|137,1340;DP=3647;ECNT=8;MBQ=20,30;MFRL=397,398;MMQ=60,60;MPOS=32;OCM=0;POPAF=2.40;TLOD=2944.26 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:1222,1477:0.559:2699:233,442:621,872:246,976,137,1340 +chrM 16183 . A C,ACCC,ACCCC,ACCCCC,ACCCCCCCCCCCCCCCC . PASS AS_FilterStatus=SITE|SITE|strand_bias|weak_evidence,possible_numt|possible_numt;AS_SB_TABLE=57,40|175,1604|56,357|5,241|0,51|70,0;DP=3634;ECNT=8;MBQ=20,30,20,20,20,20;MFRL=389,397,396,393,377,402;MMQ=60,60,60,60,60,60;MPOS=33,-2147483648,21,40,-2147483648;OCM=0;POPAF=2.40,2.40,2.40,2.40,2.40;TLOD=4075.79,494.17,190.34,2.16,10.57 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2/3/4/5:97,1779,413,246,51,70:0.698,0.165,0.101,5.638e-03,8.474e-03:2656:26,572,52,22,2,19:31,1049,278,177,36,19:57,40,306,2253 +chrM 16189 . T C,A . PASS AS_FilterStatus=SITE|weak_evidence,strand_bias,possible_numt;AS_SB_TABLE=1,2|596,2826|5,1;DP=3540;ECNT=8;MBQ=30,30,25;MFRL=414,397,369;MMQ=60,60,60;MPOS=25,4;OCM=0;POPAF=2.40,2.40;TLOD=14669.67,3.68 GT:AD:AF:DP:F1R2:F2R1:SB 0/1/2:3,3422,6:0.998,1.168e-03:3431:2,1023,2:1,2014,2:1,2,601,2827 +chrM 16217 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=2,2|1114,3004;DP=4262;ECNT=8;MBQ=30,30;MFRL=414,398;MMQ=60,60;MPOS=23;OCM=0;POPAF=2.40;TLOD=14884.95 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:4,4118:0.999:4122:2,1605:2,2427:2,2,1114,3004 +chrM 16519 . T C . PASS AS_FilterStatus=SITE;AS_SB_TABLE=3,3|4970,4301;DP=9544;ECNT=1;MBQ=30,30;MFRL=404,395;MMQ=60,60;MPOS=40;OCM=0;POPAF=2.40;TLOD=30665.46 GT:AD:AF:DP:F1R2:F2R1:SB 0/1:6,9271:1.000:9277:1,4311:3,4754:3,3,4970,4301 From 2068a7832f734b37011770590b05f10cad52cd92 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 18 Mar 2020 15:56:35 -0400 Subject: [PATCH 64/85] use already computed AF instead of recomputing in MT Low het tool --- .../MTLowHeteroplasmyFilterTool.java | 21 +++++++------------ ...ftAlignAndTrimVariantsIntegrationTest.java | 2 +- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java index aeb11df1599..a21baf0c5c6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/MTLowHeteroplasmyFilterTool.java @@ -15,6 +15,7 @@ import org.broadinstitute.hellbender.engine.TwoPassVariantWalker; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; +import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import picard.cmdline.programgroups.VariantFilteringProgramGroup; import java.io.File; @@ -101,25 +102,17 @@ protected String filterName() { return GATKVCFConstants.LOW_HET_FILTER_NAME; } - public List getData(Genotype g) { - return Arrays.stream(g.getAD()).boxed().collect(Collectors.toList()); + public List getData(Genotype g) { + return Arrays.stream(VariantContextGetters.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> null, Double.MAX_VALUE)).boxed().collect(Collectors.toList()); } protected boolean isSiteLowHeteroplasmy(VariantContext v) { - return v.getGenotypes().stream().map(g -> lowestAF(g)).min(Double::compareTo).orElse(0.0) < lowHetThreshold; + return !(v.getGenotypes().stream().flatMap(g -> getData(g).stream().filter(x -> x < lowHetThreshold)).collect(Collectors.toList()).isEmpty()); } protected List areAllelesArtifacts(final VariantContext vc) { - VariantContextBuilder vcb = new VariantContextBuilder(vc); - LinkedHashMap> dataByAllele = Mutect2AlleleFilter.getDataByAllele(vc, Genotype::hasAD, this::getData, null); - Integer total = dataByAllele.values().stream().map(alleleCounts -> alleleCounts.stream().max(Integer::compareTo).orElse(0)).mapToInt(Integer::intValue).sum(); - return dataByAllele.entrySet().stream() - .filter(entry -> !vc.getReference().equals(entry.getKey())) - .map(entry -> (entry.getValue().stream().max(Integer::compareTo).orElse(0) / (double) total) < lowHetThreshold).collect(Collectors.toList()); - } - - protected double lowestAF(Genotype g) { - List depths = getData(g); - return Collections.min(depths.subList(1, depths.size())) / (double) depths.stream().mapToInt(Integer::intValue).sum(); + LinkedHashMap> dataByAllele = Mutect2AlleleFilter.getAltDataByAllele(vc, g -> g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY), this::getData, null); + return dataByAllele.values().stream() + .map(afList -> afList.stream().max(Double::compareTo).orElse(0.0) < lowHetThreshold).collect(Collectors.toList()); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java index 4890186870f..2cd55ee4f2b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java @@ -83,7 +83,7 @@ public void testSplitAllelesWithASFilters() throws IOException { + " --" + LeftAlignAndTrimVariants.KEEP_ORIGINAL_AC_LONG_NAME, Collections.singletonList(expectedOutputFile.toString()) ); - spec.executeTest("testLeftAlignment--" + expectedOutputFile.toString(), this); + spec.executeTest("testSplitAllelesWithASFilters--" + expectedOutputFile.toString(), this); } } From 3ae2795ab7124373023bf3a3391062f03831c7f3 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 23 Mar 2020 12:03:37 -0400 Subject: [PATCH 65/85] get tests working --- .../walkers/filters/VariantFiltration.java | 42 +++++++++++++++++-- .../mutect/filtering/AlleleFilterUtils.java | 27 ++++++++++-- .../VariantFiltrationIntegrationTest.java | 1 + .../testVariantFiltration_testMask1.vcf | 2 +- .../testVariantFiltration_testMask2.vcf | 2 +- .../testVariantFiltration_testMask3.vcf | 2 +- .../testVariantFiltration_testMask4.vcf | 35 ++++++++++++++++ .../filters/VariantFiltration/vcfexample2.vcf | 2 +- 8 files changed, 101 insertions(+), 12 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask4.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java index e6268dea943..09df78431f7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java @@ -11,6 +11,9 @@ import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.AlleleFilterUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import picard.cmdline.programgroups.VariantFilteringProgramGroup; import org.broadinstitute.hellbender.engine.*; import org.broadinstitute.hellbender.exceptions.UserException; @@ -22,6 +25,7 @@ import java.util.stream.Collectors; import static java.util.Collections.singleton; +import static org.broadinstitute.hellbender.utils.variant.GATKVCFConstants.AS_FILTER_STATUS_KEY; /** @@ -117,6 +121,7 @@ public final class VariantFiltration extends VariantWalker { public static final String INVERT_LONG_NAME = "invert-filter-expression"; public static final String INVERT_GT_LONG_NAME = "invert-genotype-filter-expression"; public static final String NO_CALL_GTS_LONG_NAME = "set-filtered-genotype-to-no-call"; + public static final String ALLELE_SPECIFIC_LONG_NAME = "apply-allele-specific-filters"; private static final String FILTER_DELIMITER = ";"; @@ -232,6 +237,9 @@ public final class VariantFiltration extends VariantWalker { @Argument(fullName=NO_CALL_GTS_LONG_NAME, optional=true, doc="Set filtered genotypes to no-call") public boolean setFilteredGenotypesToNocall = false; + @Argument(fullName=ALLELE_SPECIFIC_LONG_NAME, optional=true, doc="Set mask at the allele level") + public boolean applyForAllele = false; + // JEXL expressions for the filters private List filterExps; private List genotypeFilterExps; @@ -271,6 +279,9 @@ private void initializeVcfWriter() { // setup the header fields final Set hInfo = new LinkedHashSet<>(); hInfo.addAll(getHeaderForVariants().getMetaDataInInputOrder()); + if (applyForAllele) { + hInfo.add(new VCFInfoHeaderLine(AS_FILTER_STATUS_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.String, "Filter status for each allele, as assessed by ApplyRecalibration. Note that the VCF filter field will reflect the most lenient/sensitive status across all alleles.")); + } // need AC, AN and AF since output if set filtered genotypes to no-call // If setting filtered genotypes to no-call, then allele counts (AC, AN and AF ) will be recomputed and these annotations @@ -333,11 +344,34 @@ public void onTraversalStart() { @Override public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) { + if (applyForAllele) { + // TODO either put allele specific filters in filter or do a different merge so we get all the other flags processed correctly too + List filtered = splitMultiAllelics(variant).stream().map(vc -> filter(internalApply(vc, readsContext, ref, new FeatureContext(featureContext, new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd()))), featureContext)).collect(Collectors.toList()); + // now we need to add individual alleles + // get allele filters list + List> alleleFilters = filtered.stream().map(filteredvc -> filteredvc.getFilters()).collect(Collectors.toList()); + // convert List> to List> and encode + VariantContext filteredVC = AlleleFilterUtils.addAlleleFilters(variant, alleleFilters, invalidatePreviousFilters); + writer.add(filteredVC); + } else { + writer.add(filter(internalApply(variant, readsContext, ref, featureContext), featureContext)); + } + } + + // TODO change the name of this method + protected VariantContext internalApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) { final VariantContext vc1 = invalidatePreviousFilters ? (new VariantContextBuilder(variant)).unfiltered().make() : variant; - final VariantContext vc = isMaskFilterPresent(vc1) ? vc1: addMaskIfCoversVariant(vc1, featureContext); + return isMaskFilterPresent(vc1) ? vc1: addMaskIfCoversVariant(vc1, featureContext); + } - filter(vc, featureContext); + private List splitMultiAllelics(VariantContext vc) { + List results = new ArrayList<>(); + final VariantContextBuilder vcb = new VariantContextBuilder("SimpleSplit", vc.getContig(), vc.getStart(), vc.getEnd(), + Arrays.asList(vc.getReference(), Allele.NO_CALL)); + vc.getAlternateAlleles().forEach(allele -> results.add(GATKVariantContextUtils.trimAlleles( + vcb.alleles(Arrays.asList(vc.getReference(), allele)).make(true), true, true))); + return results; } /** @@ -360,7 +394,7 @@ private boolean isMaskFilterPresent(final VariantContext vc) { return vc.getFilters() != null && vc.getFilters().contains(maskName); } - private void filter(final VariantContext vc, final FeatureContext featureContext) { + private VariantContext filter(final VariantContext vc, final FeatureContext featureContext) { final VariantContextBuilder builder = new VariantContextBuilder(vc); // make new Genotypes based on filters @@ -394,7 +428,7 @@ private void filter(final VariantContext vc, final FeatureContext featureContext builder.filters(filters); } - writer.add(builder.make()); + return builder.make(); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java index 529cec962c6..1d9cd78d98c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java @@ -1,16 +1,15 @@ package org.broadinstitute.hellbender.tools.walkers.mutect.filtering; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.vcf.VCFConstants; import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation; import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import shaded.cloud_nio.com.google.errorprone.annotations.Var; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.ListIterator; +import java.util.*; import java.util.stream.Collectors; public class AlleleFilterUtils { @@ -50,4 +49,24 @@ public static List addFilter(List currentFilters, String newFilt return updated; } } + + public static VariantContext addAlleleFilters(VariantContext vc, List> alleleFilters, boolean invalidatePreviousFilters) { + // TODO: should invalidatePreviousFilters apply to allele filters? probably TBD + String encodedFilters = AlleleFilterUtils.encodeASFilters(alleleFilters.stream().map( + af -> af.isEmpty() ? Collections.singletonList(GATKVCFConstants.SITE_LEVEL_FILTERS) : af.stream().collect(Collectors.toList())).collect(Collectors.toList())); + VariantContextBuilder vcb = new VariantContextBuilder(vc).attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, encodedFilters); + + Set siteFilters = alleleFilters.stream().skip(1) + .collect(()->new HashSet<>(alleleFilters.get(0)), Set::retainAll, Set::retainAll); + + if (invalidatePreviousFilters && !siteFilters.isEmpty()) { + vcb.filters(siteFilters); + } else if (siteFilters.isEmpty() && vcb.getFilters() == null) { + vcb.passFilters(); + } else { + // either siteFilters is not empty or vbc filter is not empty + siteFilters.forEach(filter -> vcb.filter(filter)); + } + return vcb.make(); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java index 0f4fe1814a0..fee71c6e677 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java @@ -48,6 +48,7 @@ public Object[][] masks() { {"foo", "--mask " + getToolTestDataDir() + "vcfexample2.vcf", "testVariantFiltration_testMask1.vcf"}, {"foo", "--mask " + new File(getToolTestDataDir() + "vcfMask.vcf").getAbsolutePath(), "testVariantFiltration_testMask2.vcf"}, {"foo", "--" + VariantFiltration.MASK_EXTENSION_LONG_NAME + " 10 --mask:VCF " + getToolTestDataDir() + "vcfMask.vcf", "testVariantFiltration_testMask3.vcf"}, + {"foo", "--apply-allele-specific-filters true --mask " + new File(getToolTestDataDir() + "vcfMask.vcf").getAbsolutePath(), "testVariantFiltration_testMask4.vcf"} }; } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask1.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask1.vcf index 1f178978ee2..a177892c835 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask1.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask1.vcf @@ -28,7 +28,7 @@ 1 10020453 . G A,T 48.53 foo AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 foo AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 foo AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 foo AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 foo AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 foo AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 foo AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 foo AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask2.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask2.vcf index 85d31336f1b..b270d119a64 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask2.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask2.vcf @@ -28,7 +28,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 foo AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 foo AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 foo AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask3.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask3.vcf index f8b3aa072f2..f36f0cce13b 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask3.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask3.vcf @@ -28,7 +28,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 foo AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 foo AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 foo AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 foo AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 foo AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask4.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask4.vcf new file mode 100644 index 00000000000..e4b8a000fb9 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMask4.vcf @@ -0,0 +1,35 @@ +##fileformat=VCFv4.2 +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=human_b36_both.fasta +##source=UnifiedGenotyper +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA11894 NA11992 NA06994 NA07037 NA12760 NA12761 NA12414 NA12762 NA12716 NA12717 NA10851 NA11919 NA11918 NA07347 NA12873 NA12874 NA06986 NA06985 NA07346 NA12763 NA11994 NA11993 NA11995 NA11840 NA12234 NA07000 NA12003 NA07357 NA11920 NA12287 NA12144 NA07051 NA12828 NA12776 NA11831 NA12044 NA12045 NA11830 NA12872 NA12489 NA11832 NA12043 NA11881 NA12751 NA12750 NA11931 NA12155 NA12154 NA12249 NA12156 NA12815 NA11829 NA12749 NA12812 NA12813 NA12814 NA12004 NA12005 NA10847 NA12006 +1 10020400 . C T 30.66 foo AF=0.03;AFrange=0.01-0.09,95%;AS_FilterStatus=foo;AlleleBalance=0.72;DoC=193;FisherStrand=9.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=177.45;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 0/1:1:6 0/0:2:6 0/0:1:2 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:1:2 0/0:2:6 0/0:1:3 0/0:2:9 0/0:1:4 ./. 0/0:1:3 0/1:2:4 0/0:1:2 0/0:3:10 ./. 0/0:1:3 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:5 0/0:0:1 0/0:1:3 0/0:1:5 0/1:2:4 0/0:3:9 0/0:3:9 0/0:1:2 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:1:4 0/0:1:2 0/0:0:1 0/0:1:3 0/1:5:5 0/0:1:2 0/0:1:2 0/0:2:8 0/1:3:3 0/0:1:3 0/0:1:2 0/0:2:5 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/0:2:5 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:4 0/0:0:1 +1 10020408 . C A 57.15 foo AF=0.05;AFrange=0.01-0.11,95%;AS_FilterStatus=foo;AlleleBalance=0.73;DoC=179;FisherStrand=17.9;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=174.11;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:1:4 1/1:0:1 0/0:1:3 ./. 0/0:0:1 0/0:1:3 0/0:1:2 0/0:2:6 0/0:1:3 0/0:3:9 0/0:1:2 ./. 0/0:1:3 1/0:3:4 0/0:1:2 0/0:2:8 ./. 0/0:1:4 0/0:1:3 0/0:1:2 0/0:0:1 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:6 0/0:0:1 0/0:1:3 0/0:2:7 1/0:2:5 0/0:1:4 0/0:2:8 0/0:1:3 ./. 0/0:1:2 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:6:5 0/0:1:3 0/0:1:2 0/0:2:5 1/0:3:3 0/0:1:3 0/0:0:1 0/0:2:5 ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:2:5 ./. 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:3 0/0:0:1 +1 10020416 . G A,T 40.12 PASS AF=0.04,0.00;AFrange=0.01-0.10,95%;AS_FilterStatus=SITE|SITE;AlleleBalance=0.73;DoC=166;FisherStrand=15.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.94;RMSMAPQ=176.69;SB=-0.01;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:1:3 0/0:0:1 0/0:1:3 ./. 0/0:0:1 0/0:1:3 2/2:0:1 0/0:2:8 0/0:1:3 0/0:2:6 ./. ./. 0/0:1:3 1/0:2:4 0/0:1:2 0/0:2:6 ./. 0/0:1:4 0/0:1:3 0/0:1:2 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:5 0/0:0:1 0/0:1:4 0/0:2:7 1/0:2:5 0/0:1:4 0/0:2:5 0/0:1:4 ./. 0/0:0:1 0/0:1:3 0/0:1:4 ./. 0/0:1:3 0/0:0:1 1/1:0:2 1/0:6:5 0/0:1:3 0/0:0:1 0/0:1:4 1/0:3:3 0/0:1:2 0/0:1:4 0/0:1:4 0/0:1:3 0/0:1:2 0/0:0:1 0/0:0:1 0/0:1:4 0/0:1:3 0/0:1:3 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 +1 10020436 . A T 64.57 PASS AF=0.06;AFrange=0.01-0.12,95%;AS_FilterStatus=SITE;AlleleBalance=0.73;DoC=168;FisherStrand=3.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.66;SB=-1.53;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:5 ./. 0/0:1:3 0/0:1:2 0/0:0:1 0/0:1:3 ./. 0/0:1:5 0/0:1:2 0/0:3:11 0/0:1:2 ./. 0/0:1:2 0/1:3:4 0/0:1:2 0/1:3:8 ./. 0/0:0:4 0/0:1:4 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:3 ./. 0/0:1:2 0/0:1:2 0/1:1:8 0/0:1:5 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:2:6 0/0:0:1 0/1:3:3 0/0:1:2 0/0:1:2 0/0:1:2 0/1:2:5 0/0:0:1 0/0:1:5 0/0:1:4 0/0:1:2 0/0:1:2 ./. 0/0:1:2 0/0:1:4 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/0:1:2 0/0:1:2 +1 10020439 . G A,T 57.80 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AS_FilterStatus=SITE|SITE;AlleleBalance=0.73;DoC=156;FisherStrand=9.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=167.02;SB=-0.33;SpanningDeletions=0 GT:GQ:RD 1/0:2:6 0/0:2:5 ./. 0/0:1:3 0/0:1:2 ./. 0/0:1:3 ./. 0/0:2:5 0/0:1:2 0/0:3:11 0/0:1:2 ./. 0/0:1:2 1/0:3:4 0/0:0:1 0/0:2:7 ./. 0/0:1:4 0/0:1:4 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:3 ./. 0/0:1:2 ./. 1/0:2:8 0/0:1:4 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:2 0/0:1:5 0/0:1:5 0/0:0:1 0/0:1:3 0/0:2:6 0/0:0:1 1/0:3:3 0/0:0:1 0/0:1:2 0/0:0:1 1/0:2:6 0/0:0:1 0/0:0:1 0/0:1:4 2/2:0:2 0/0:1:2 ./. 0/0:1:2 0/0:1:3 2/2:0:2 1/0:2:4 0/0:1:4 0/0:0:1 0/0:1:2 0/0:1:2 +1 10020447 . C T 68.03 PASS AF=0.06;AFrange=0.01-0.14,95%;AS_FilterStatus=SITE;AlleleBalance=0.72;DoC=140;FisherStrand=9.4;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=166.27;SB=-0.62;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:1:2 ./. 0/0:1:3 1/1:0:1 ./. 0/0:0:3 ./. 0/0:2:5 0/0:1:2 0/0:2:7 0/0:1:2 ./. 0/0:1:2 0/1:3:4 0/0:0:1 0/0:2:7 ./. 0/0:1:3 0/0:1:4 ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:0:1 ./. 0/0:1:2 0/0:0:1 0/1:2:7 0/0:1:5 0/0:1:4 0/0:1:2 0/0:1:2 ./. 0/0:2:5 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 ./. 0/1:2:3 0/0:0:1 0/0:1:2 0/0:0:1 0/1:2:6 0/0:1:2 0/0:0:1 0/0:2:5 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/1:3:4 0/0:1:2 0/0:1:3 0/0:1:4 0/0:1:2 +1 10020452 . T C 32.71 PASS AF=0.05;AFrange=0.01-0.12,95%;AS_FilterStatus=SITE;AlleleBalance=0.70;DoC=138;FisherStrand=9.0;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=167.20;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 1/0:1:5 1/0:1:3 0/0:0:1 0/0:1:3 ./. ./. 0/0:1:3 ./. 0/0:1:4 0/0:1:2 0/0:2:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:2:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:0:2 0/0:0:1 1/0:1:7 0/0:0:3 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:2:5 0/0:0:1 1/0:3:3 0/0:0:1 0/0:1:2 0/0:0:1 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:1:5 ./. +1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AS_FilterStatus=SITE|SITE;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. +1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AS_FilterStatus=SITE;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 +1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AS_FilterStatus=SITE|SITE;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 +1 10020484 . AG A,TG 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AS_FilterStatus=foo|SITE;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020485 . G A,T 32.66 foo AF=0.03,0.00;AFrange=0.01-0.08,95%;AS_FilterStatus=foo|foo;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 +1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AS_FilterStatus=SITE|SITE;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 +1 10020615 . C T,A 162.10 foo AF=0.04,0.00;AFrange=0.01-0.10,95%;AS_FilterStatus=foo|foo;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/vcfexample2.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/vcfexample2.vcf index bfd79da45e6..9940c2db6ae 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/vcfexample2.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/vcfexample2.vcf @@ -27,7 +27,7 @@ 1 10020453 . G A,T 48.53 . AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:RD:GQ 1/0:5:2.22 0/0:3:0.87 1/1:1:0.01 0/0:3:0.89 ./. ./. 0/0:2:0.60 ./. 0/0:3:0.90 0/0:2:0.60 0/0:6:1.46 0/0:2:0.60 0/0:1:0.30 0/0:3:0.90 1/0:4:2.52 0/0:1:0.30 0/0:8:1.71 ./. 0/0:3:0.87 0/0:3:0.90 ./. ./. 0/0:1:0.30 0/0:3:0.90 0/0:1:0.30 0/0:2:0.60 0/0:1:0.15 0/0:2:0.59 0/0:2:0.30 1/0:7:1.72 0/2:2:0.01 0/0:4:1.20 0/0:1:0.30 0/0:2:0.60 ./. 0/0:3:0.76 0/0:5:1.50 0/0:1:0.30 0/0:3:0.90 0/0:5:1.40 0/0:1:0.30 1/0:3:2.24 0/0:1:0.30 0/0:1:0.30 ./. 1/0:6:1.92 0/0:1:0.30 0/0:2:0.58 0/0:3:0.90 0/0:1:0.30 0/0:2:0.60 ./. ./. 0/0:2:0.60 0/0:1:0.30 0/0:4:1.20 0/0:2:0.60 0/0:3:0.90 0/0:5:1.50 ./. 1 10020464 . G T 74.83 . AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:RD:GQ 0/1:5:2.63 0/0:6:1.80 0/0:1:0.30 0/0:4:1.20 ./. ./. ./. ./. 0/0:3:0.90 0/0:1:0.30 0/0:3:0.90 0/0:4:1.20 0/0:1:0.30 0/0:3:0.90 0/1:4:2.93 0/0:2:0.60 0/0:9:0.25 ./. 0/0:3:0.87 0/0:2:0.60 0/0:1:0.30 ./. 0/0:2:0.60 0/0:3:0.90 0/0:1:0.30 0/0:1:0.30 0/0:2:0.60 0/0:2:0.60 0/0:3:0.90 0/1:5:2.63 0/0:5:1.50 0/0:5:1.50 0/0:3:0.90 0/0:2:0.60 ./. 0/0:1:0.30 0/0:4:1.20 0/0:2:0.59 0/0:4:1.20 0/0:3:0.90 0/0:1:0.27 0/1:4:4.81 0/0:4:1.20 0/0:1:0.30 ./. 0/1:7:2.23 0/0:1:0.30 0/0:7:1.54 0/1:4:1.23 0/0:1:0.30 0/0:1:0.30 ./. 0/0:2:0.30 0/0:2:0.60 0/0:2:0.30 0/0:3:0.90 0/0:1:0.30 0/0:2:0.60 0/0:7:2.06 0/0:2:0.60 1 10020470 . A G,T 91.66 . AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:RD:GQ 0/1:5:1.74 0/0:6:1.80 0/0:1:0.30 0/0:4:1.20 2/2:1:0.00 ./. ./. ./. 0/0:1:0.29 0/0:1:0.24 0/0:2:0.60 0/0:4:1.20 0/0:1:0.30 0/0:4:1.20 0/1:4:2.24 0/0:2:0.60 0/0:9:2.69 0/0:2:0.60 0/0:3:0.88 0/0:1:0.30 0/0:1:0.30 0/0:1:0.30 0/0:3:0.60 0/0:4:1.20 0/0:1:0.30 0/0:1:0.30 0/0:3:0.90 0/0:3:0.60 0/0:6:1.46 0/1:4:2.24 0/1:5:1.66 0/0:9:2.70 0/0:4:1.20 0/0:2:0.60 0/0:2:0.60 0/0:2:0.60 0/0:5:1.49 0/0:3:0.90 0/0:4:1.20 0/0:4:1.20 0/0:1:0.30 0/1:4:5.78 0/0:6:1.80 0/0:2:0.56 ./. 0/1:6:4.98 0/0:1:0.30 0/0:9:2.29 0/0:4:1.20 0/0:1:0.29 0/0:1:0.30 ./. 0/0:8:1.54 0/0:2:0.60 0/0:1:0.30 0/1:3:2.54 0/0:3:0.90 0/0:3:0.60 0/0:7:2.10 0/0:2:0.60 -1 10020484 . A C,T 55.89 . AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:RD:GQ 0/1:7:2.18 0/0:8:2.40 0/0:3:0.85 0/0:4:1.20 0/0:3:0.90 ./. ./. ./. 0/0:4:1.13 0/0:1:0.30 0/0:5:1.50 0/0:4:1.20 0/0:1:0.30 0/0:4:1.20 0/1:4:2.76 0/0:3:0.90 0/0:16:3.57 0/0:3:0.60 0/0:3:0.90 ./. 0/0:1:0.30 0/0:2:0.60 0/0:3:0.84 0/0:6:1.43 0/0:1:0.30 0/2:5:1.51 0/0:5:1.35 0/0:3:0.90 0/0:5:1.39 0/1:7:1.85 0/0:8:1.50 0/0:10:2.99 0/0:4:1.20 ./. 0/0:4:1.12 0/0:4:1.19 0/0:6:1.80 0/0:5:1.20 0/0:4:1.20 0/0:5:1.40 ./. 0/1:4:6.53 0/0:7:2.10 0/0:5:0.59 ./. 0/1:5:2.65 0/0:1:0.30 0/0:7:2.09 0/0:4:1.20 0/0:3:0.19 0/0:2:0.60 0/0:2:0.30 0/0:10:2.70 0/0:2:0.60 0/0:2:0.30 0/0:4:1.20 0/0:5:1.49 0/0:4:0.98 0/0:6:1.80 0/0:5:0.90 +1 10020484 . AG A,TG 55.89 . AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:RD:GQ 0/1:7:2.18 0/0:8:2.40 0/0:3:0.85 0/0:4:1.20 0/0:3:0.90 ./. ./. ./. 0/0:4:1.13 0/0:1:0.30 0/0:5:1.50 0/0:4:1.20 0/0:1:0.30 0/0:4:1.20 0/1:4:2.76 0/0:3:0.90 0/0:16:3.57 0/0:3:0.60 0/0:3:0.90 ./. 0/0:1:0.30 0/0:2:0.60 0/0:3:0.84 0/0:6:1.43 0/0:1:0.30 0/2:5:1.51 0/0:5:1.35 0/0:3:0.90 0/0:5:1.39 0/1:7:1.85 0/0:8:1.50 0/0:10:2.99 0/0:4:1.20 ./. 0/0:4:1.12 0/0:4:1.19 0/0:6:1.80 0/0:5:1.20 0/0:4:1.20 0/0:5:1.40 ./. 0/1:4:6.53 0/0:7:2.10 0/0:5:0.59 ./. 0/1:5:2.65 0/0:1:0.30 0/0:7:2.09 0/0:4:1.20 0/0:3:0.19 0/0:2:0.60 0/0:2:0.30 0/0:10:2.70 0/0:2:0.60 0/0:2:0.30 0/0:4:1.20 0/0:5:1.49 0/0:4:0.98 0/0:6:1.80 0/0:5:0.90 1 10020485 . G A,T 32.66 . AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:RD:GQ 1/0:7:1.04 0/0:8:1.80 0/0:3:0.87 0/0:4:1.20 0/0:3:0.90 ./. ./. ./. 0/0:4:1.20 0/0:1:0.30 0/0:5:1.50 0/0:4:1.16 0/0:1:0.30 0/0:4:1.20 1/0:4:2.52 0/0:3:0.90 0/0:16:4.21 0/0:3:0.90 0/0:3:0.90 ./. 0/0:1:0.30 0/0:2:0.60 0/0:3:0.90 0/0:6:1.70 0/0:1:0.30 0/0:5:1.41 0/0:5:1.31 0/0:3:0.90 0/0:5:0.19 1/0:7:1.52 0/0:6:1.71 0/0:10:2.94 0/0:4:1.15 ./. 0/0:4:1.12 0/0:4:1.19 0/0:6:1.80 0/0:5:1.49 0/0:4:1.20 0/0:5:1.50 ./. 1/0:4:5.01 0/0:7:2.11 0/0:6:1.78 ./. 1/0:5:2.22 0/0:1:0.30 0/0:7:1.21 0/0:4:1.19 0/0:3:0.41 0/0:2:0.60 0/0:2:0.60 0/0:10:3.00 0/0:2:0.60 0/0:2:0.60 0/0:4:1.20 0/0:5:0.71 0/0:4:1.02 0/0:6:1.80 0/2:4:1.73 1 10020492 . T A,G 44.35 . AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:RD:GQ 1/0:8:1.35 0/0:7:2.10 0/0:5:1.46 0/0:4:1.20 0/0:4:1.20 ./. 0/0:2:0.60 0/0:1:0.30 0/0:5:0.90 0/0:1:0.30 0/0:10:3.01 0/0:5:1.50 2/0:2:0.79 0/0:4:1.17 1/0:4:2.65 0/0:3:0.90 0/0:16:4.75 0/0:3:0.60 0/0:3:0.90 0/0:1:0.29 0/0:3:0.90 1/0:4:0.68 1/0:2:0.38 0/0:6:1.50 0/0:1:0.30 0/0:11:2.12 0/0:4:0.90 0/0:4:1.20 0/0:11:2.10 1/0:8:1.36 0/0:11:2.44 0/0:10:2.95 0/0:4:1.19 0/0:1:0.30 0/0:4:1.12 0/0:6:1.80 0/0:5:1.48 0/0:4:1.20 0/0:4:1.20 0/0:2:0.60 ./. 1/0:3:2.46 0/0:7:2.11 0/0:6:1.78 0/0:1:0.28 1/0:4:2.56 0/0:1:0.30 0/0:8:2.00 0/0:6:1.80 0/0:3:0.90 0/0:2:0.60 0/0:2:0.60 0/0:9:2.40 0/0:2:0.60 0/0:5:0.90 0/0:5:1.50 0/0:6:1.49 0/0:5:1.42 0/0:7:2.10 0/0:9:2.41 1 10020615 . C T,A 162.10 . AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:RD:GQ 0/0:5:1.50 0/0:10:2.67 0/0:6:1.80 0/0:5:1.50 0/0:4:1.17 ./. 0/0:6:1.81 1/1:1:0.00 0/0:8:2.39 0/0:8:2.34 0/0:22:6.26 0/0:1:0.15 0/0:3:0.90 0/0:2:0.60 2/2:3:0.90 0/0:3:0.90 2/0:11:6.64 0/0:2:0.60 0/0:6:1.62 0/0:3:0.77 0/0:3:0.90 0/0:3:0.90 0/0:7:2.06 0/0:6:1.79 ./. 0/0:10:2.97 0/0:4:1.20 0/0:4:0.51 0/0:14:3.83 0/0:1:0.30 2/0:11:4.92 0/0:5:1.45 0/0:3:0.86 0/0:1:0.30 0/0:2:0.60 0/0:6:1.80 0/0:2:0.60 0/0:3:0.90 0/0:4:1.20 2/0:5:5.46 0/0:5:1.49 0/0:2:0.55 2/0:8:2.15 0/0:5:1.49 0/0:5:1.41 0/0:3:0.90 ./. 0/0:4:0.91 0/0:3:0.90 0/0:10:2.71 0/0:1:0.30 0/0:3:0.90 0/0:1:0.30 0/0:4:1.19 0/0:9:2.70 0/0:3:0.90 0/0:4:0.87 0/0:2:0.30 0/0:6:1.79 0/0:4:1.20 From a9ab22c164e3231fc73704b933726833c93d08c4 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 23 Mar 2020 12:38:44 -0400 Subject: [PATCH 66/85] final mods --- .../walkers/filters/VariantFiltration.java | 24 +++++++------------ .../mutect/filtering/AlleleFilterUtils.java | 20 ++++++++++------ 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java index 09df78431f7..9983095f79a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java @@ -13,7 +13,6 @@ import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.AlleleFilterUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import picard.cmdline.programgroups.VariantFilteringProgramGroup; import org.broadinstitute.hellbender.engine.*; import org.broadinstitute.hellbender.exceptions.UserException; @@ -345,24 +344,15 @@ public void onTraversalStart() { @Override public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) { if (applyForAllele) { - // TODO either put allele specific filters in filter or do a different merge so we get all the other flags processed correctly too - List filtered = splitMultiAllelics(variant).stream().map(vc -> filter(internalApply(vc, readsContext, ref, new FeatureContext(featureContext, new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd()))), featureContext)).collect(Collectors.toList()); - // now we need to add individual alleles - // get allele filters list + List filtered = splitMultiAllelics(variant).stream().map(vc -> filter(vc, new FeatureContext(featureContext, new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd())))).collect(Collectors.toList()); + // get filters for each allele List> alleleFilters = filtered.stream().map(filteredvc -> filteredvc.getFilters()).collect(Collectors.toList()); - // convert List> to List> and encode - VariantContext filteredVC = AlleleFilterUtils.addAlleleFilters(variant, alleleFilters, invalidatePreviousFilters); + // add in the AS_FilterStatus and set the variant filters + VariantContext filteredVC = AlleleFilterUtils.addAlleleAndComputeSiteFilters(variant, alleleFilters); writer.add(filteredVC); } else { - writer.add(filter(internalApply(variant, readsContext, ref, featureContext), featureContext)); + writer.add(filter(variant, featureContext)); } - - } - - // TODO change the name of this method - protected VariantContext internalApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) { - final VariantContext vc1 = invalidatePreviousFilters ? (new VariantContextBuilder(variant)).unfiltered().make() : variant; - return isMaskFilterPresent(vc1) ? vc1: addMaskIfCoversVariant(vc1, featureContext); } private List splitMultiAllelics(VariantContext vc) { @@ -394,7 +384,9 @@ private boolean isMaskFilterPresent(final VariantContext vc) { return vc.getFilters() != null && vc.getFilters().contains(maskName); } - private VariantContext filter(final VariantContext vc, final FeatureContext featureContext) { + private VariantContext filter(final VariantContext variant, final FeatureContext featureContext) { + final VariantContext vcModFilters = invalidatePreviousFilters ? (new VariantContextBuilder(variant)).unfiltered().make() : variant; + final VariantContext vc = isMaskFilterPresent(vcModFilters) ? vcModFilters: addMaskIfCoversVariant(vcModFilters, featureContext); final VariantContextBuilder builder = new VariantContextBuilder(vc); // make new Genotypes based on filters diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java index 1d9cd78d98c..cb7814beda0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java @@ -12,6 +12,9 @@ import java.util.*; import java.util.stream.Collectors; +/** + * Helps read and set allele specific filters + */ public class AlleleFilterUtils { public static List> decodeASFilters(VariantContext vc) { @@ -50,8 +53,14 @@ public static List addFilter(List currentFilters, String newFilt } } - public static VariantContext addAlleleFilters(VariantContext vc, List> alleleFilters, boolean invalidatePreviousFilters) { - // TODO: should invalidatePreviousFilters apply to allele filters? probably TBD + /** + * Sets the filters for each allele and calculates the intersection of the allele filters to set on the variant. + * PASS if the intersection is empty. + * @param vc The variant context to build from, however it assumes all relevant filters are set in the alleleFilters collection + * @param alleleFilters filters to be applied to each allele, the intersection of these filters are applied at the site level + * @return The updated variant context + */ + public static VariantContext addAlleleAndComputeSiteFilters(VariantContext vc, List> alleleFilters) { String encodedFilters = AlleleFilterUtils.encodeASFilters(alleleFilters.stream().map( af -> af.isEmpty() ? Collections.singletonList(GATKVCFConstants.SITE_LEVEL_FILTERS) : af.stream().collect(Collectors.toList())).collect(Collectors.toList())); VariantContextBuilder vcb = new VariantContextBuilder(vc).attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, encodedFilters); @@ -59,13 +68,10 @@ public static VariantContext addAlleleFilters(VariantContext vc, List siteFilters = alleleFilters.stream().skip(1) .collect(()->new HashSet<>(alleleFilters.get(0)), Set::retainAll, Set::retainAll); - if (invalidatePreviousFilters && !siteFilters.isEmpty()) { + if (!siteFilters.isEmpty()) { vcb.filters(siteFilters); - } else if (siteFilters.isEmpty() && vcb.getFilters() == null) { - vcb.passFilters(); } else { - // either siteFilters is not empty or vbc filter is not empty - siteFilters.forEach(filter -> vcb.filter(filter)); + vcb.passFilters(); } return vcb.make(); } From 3076777e9e365eb3ad65026f217d569ac8647ab4 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 23 Mar 2020 13:17:09 -0400 Subject: [PATCH 67/85] minor change --- .../tools/walkers/filters/VariantFiltrationIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java index fee71c6e677..fd391d450f8 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java @@ -48,7 +48,7 @@ public Object[][] masks() { {"foo", "--mask " + getToolTestDataDir() + "vcfexample2.vcf", "testVariantFiltration_testMask1.vcf"}, {"foo", "--mask " + new File(getToolTestDataDir() + "vcfMask.vcf").getAbsolutePath(), "testVariantFiltration_testMask2.vcf"}, {"foo", "--" + VariantFiltration.MASK_EXTENSION_LONG_NAME + " 10 --mask:VCF " + getToolTestDataDir() + "vcfMask.vcf", "testVariantFiltration_testMask3.vcf"}, - {"foo", "--apply-allele-specific-filters true --mask " + new File(getToolTestDataDir() + "vcfMask.vcf").getAbsolutePath(), "testVariantFiltration_testMask4.vcf"} + {"foo", "--apply-allele-specific-filters --mask " + new File(getToolTestDataDir() + "vcfMask.vcf").getAbsolutePath(), "testVariantFiltration_testMask4.vcf"} }; } From 10a4ba7ead50036126e9a0b9bf75555ff09ac0a1 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 24 Mar 2020 10:09:52 -0400 Subject: [PATCH 68/85] fix expected files --- .../expected/testVariantFiltration_testClusteredSnps.vcf | 6 +++--- .../expected/testVariantFiltration_testFilter1.vcf | 2 +- .../expected/testVariantFiltration_testFilter2.vcf | 2 +- .../testVariantFiltration_testFilterWithSeparateNames.vcf | 2 +- .../expected/testVariantFiltration_testGenotypeFilters1.vcf | 2 +- .../expected/testVariantFiltration_testGenotypeFilters2.vcf | 2 +- .../expected/testVariantFiltration_testInvertFilter.vcf | 2 +- .../expected/testVariantFiltration_testInvertJexlFilter.vcf | 2 +- .../expected/testVariantFiltration_testMaskReversed.vcf | 2 +- .../expected/testVariantFiltration_testNoAction.vcf | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testClusteredSnps.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testClusteredSnps.vcf index fff412e3f9b..02fe388ef22 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testClusteredSnps.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testClusteredSnps.vcf @@ -28,7 +28,7 @@ 1 10020453 . G A,T 48.53 SnpCluster AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 SnpCluster AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 -1 10020485 . G A,T 32.66 SnpCluster AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 -1 10020492 . T A,G 44.35 SnpCluster AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 +1 10020484 . AG A,TG 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020485 . G A,T 32.66 PASS AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 +1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 PASS AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilter1.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilter1.vcf index 789cb5ddb67..50a95410e3a 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilter1.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilter1.vcf @@ -28,7 +28,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 foo AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 PASS AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 PASS AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilter2.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilter2.vcf index 9330cb5c0d6..9428f6ea6f1 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilter2.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilter2.vcf @@ -28,7 +28,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 PASS AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 bar AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 PASS AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilterWithSeparateNames.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilterWithSeparateNames.vcf index cf1f0a020b9..b32e6f2fb7f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilterWithSeparateNames.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testFilterWithSeparateNames.vcf @@ -29,7 +29,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 PASS AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 ABF;FSF AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 PASS AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testGenotypeFilters1.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testGenotypeFilters1.vcf index 1686297ff81..f3f7b0cf981 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testGenotypeFilters1.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testGenotypeFilters1.vcf @@ -29,7 +29,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 PASS AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 PASS AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testGenotypeFilters2.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testGenotypeFilters2.vcf index 6b81226349e..e3d959a5163 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testGenotypeFilters2.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testGenotypeFilters2.vcf @@ -29,7 +29,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:FT:GQ:RD 1/0:PASS:2:5 0/0:PASS:1:3 1/1:foo:0:1 0/0:PASS:1:3 ./.:PASS ./.:PASS 0/0:PASS:1:2 ./.:PASS 0/0:PASS:1:3 0/0:PASS:1:2 0/0:PASS:1:6 0/0:PASS:1:2 0/0:PASS:0:1 0/0:PASS:1:3 1/0:PASS:3:4 0/0:PASS:0:1 0/0:PASS:2:8 ./.:PASS 0/0:PASS:1:3 0/0:PASS:1:3 ./.:PASS ./.:PASS 0/0:PASS:0:1 0/0:PASS:1:3 0/0:PASS:0:1 0/0:PASS:1:2 0/0:PASS:0:1 0/0:PASS:1:2 0/0:PASS:0:2 1/0:PASS:2:7 0/2:PASS:0:2 0/0:PASS:1:4 0/0:PASS:0:1 0/0:PASS:1:2 ./.:PASS 0/0:PASS:1:3 0/0:PASS:2:5 0/0:PASS:0:1 0/0:PASS:1:3 0/0:PASS:1:5 0/0:PASS:0:1 1/0:PASS:2:3 0/0:PASS:0:1 0/0:PASS:0:1 ./.:PASS 1/0:PASS:2:6 0/0:PASS:0:1 0/0:PASS:1:2 0/0:PASS:1:3 0/0:PASS:0:1 0/0:PASS:1:2 ./.:PASS ./.:PASS 0/0:PASS:1:2 0/0:PASS:0:1 0/0:PASS:1:4 0/0:PASS:1:2 0/0:PASS:1:3 0/0:PASS:2:5 ./.:PASS 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:FT:GQ:RD 0/1:PASS:2:5 0/0:PASS:2:6 0/0:PASS:0:1 0/0:PASS:1:4 2/2:foo:0:1 ./.:PASS ./.:PASS ./.:PASS 0/0:PASS:0:1 0/0:PASS:0:1 0/0:PASS:1:2 0/0:PASS:1:4 0/0:PASS:0:1 0/0:PASS:1:4 0/1:PASS:2:4 0/0:PASS:1:2 0/0:PASS:3:9 0/0:PASS:1:2 0/0:PASS:1:3 0/0:PASS:0:1 0/0:PASS:0:1 0/0:PASS:0:1 0/0:PASS:1:3 0/0:PASS:1:4 0/0:PASS:0:1 0/0:PASS:0:1 0/0:PASS:1:3 0/0:PASS:1:3 0/0:PASS:1:6 0/1:PASS:2:4 0/1:PASS:2:5 0/0:PASS:3:9 0/0:PASS:1:4 0/0:PASS:1:2 0/0:PASS:1:2 0/0:PASS:1:2 0/0:PASS:1:5 0/0:PASS:1:3 0/0:PASS:1:4 0/0:PASS:1:4 0/0:PASS:0:1 0/1:PASS:6:4 0/0:PASS:2:6 0/0:PASS:1:2 ./.:PASS 0/1:PASS:5:6 0/0:PASS:0:1 0/0:PASS:2:9 0/0:PASS:1:4 0/0:PASS:0:1 0/0:PASS:0:1 ./.:PASS 0/0:PASS:2:8 0/0:PASS:1:2 0/0:PASS:0:1 0/1:PASS:3:3 0/0:PASS:1:3 0/0:PASS:1:3 0/0:PASS:2:7 0/0:PASS:1:2 -1 10020484 . A C,T 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 PASS AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 PASS AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:FT:GQ:RD 0/0:PASS:2:5 0/0:PASS:3:10 0/0:PASS:2:6 0/0:PASS:2:5 0/0:PASS:1:4 ./.:PASS 0/0:PASS:2:6 1/1:foo:0:1 0/0:PASS:2:8 0/0:PASS:2:8 0/0:PASS:6:22 0/0:PASS:0:1 0/0:PASS:1:3 0/0:PASS:1:2 2/2:foo:1:3 0/0:PASS:1:3 2/0:PASS:7:11 0/0:PASS:1:2 0/0:PASS:2:6 0/0:PASS:1:3 0/0:PASS:1:3 0/0:PASS:1:3 0/0:PASS:2:7 0/0:PASS:2:6 ./.:PASS 0/0:PASS:3:10 0/0:PASS:1:4 0/0:PASS:1:4 0/0:PASS:4:14 0/0:PASS:0:1 2/0:PASS:5:11 0/0:PASS:1:5 0/0:PASS:1:3 0/0:PASS:0:1 0/0:PASS:1:2 0/0:PASS:2:6 0/0:PASS:1:2 0/0:PASS:1:3 0/0:PASS:1:4 2/0:PASS:5:5 0/0:PASS:1:5 0/0:PASS:1:2 2/0:PASS:2:8 0/0:PASS:1:5 0/0:PASS:1:5 0/0:PASS:1:3 ./.:PASS 0/0:PASS:1:4 0/0:PASS:1:3 0/0:PASS:3:10 0/0:PASS:0:1 0/0:PASS:1:3 0/0:PASS:0:1 0/0:PASS:1:4 0/0:PASS:3:9 0/0:PASS:1:3 0/0:PASS:1:4 0/0:PASS:0:2 0/0:PASS:2:6 0/0:PASS:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testInvertFilter.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testInvertFilter.vcf index b5de375b71e..74ba3a999fb 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testInvertFilter.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testInvertFilter.vcf @@ -29,7 +29,7 @@ 1 10020453 . G A,T 48.53 ABF;FSF AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 ABF;FSF AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 ABF;FSF AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 ABF;FSF AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 ABF;FSF AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 ABF;FSF AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 ABF;FSF AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testInvertJexlFilter.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testInvertJexlFilter.vcf index bde54652f01..89d7925175b 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testInvertJexlFilter.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testInvertJexlFilter.vcf @@ -29,7 +29,7 @@ 1 10020453 . G A,T 48.53 ABF;FSF AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 ABF;FSF AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 ABF;FSF AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 ABF;FSF AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 ABF;FSF AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 ABF;FSF AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 ABF;FSF AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskReversed.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskReversed.vcf index 7daf9fedb31..2554aa891f9 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskReversed.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskReversed.vcf @@ -28,7 +28,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 outsideGoodSites AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 outsideGoodSites AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 outsideGoodSites AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 outsideGoodSites AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 outsideGoodSites AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testNoAction.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testNoAction.vcf index 0087a84e7d5..6e04b65a4fb 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testNoAction.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testNoAction.vcf @@ -27,7 +27,7 @@ 1 10020453 . G A,T 48.53 PASS AF=0.05,0.00;AFrange=0.01-0.12,95%;AlleleBalance=0.70;DoC=133;FisherStrand=10.8;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.97;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:2:5 0/0:1:3 1/1:0:1 0/0:1:3 ./. ./. 0/0:1:2 ./. 0/0:1:3 0/0:1:2 0/0:1:6 0/0:1:2 0/0:0:1 0/0:1:3 1/0:3:4 0/0:0:1 0/0:2:8 ./. 0/0:1:3 0/0:1:3 ./. ./. 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:2 0/0:0:1 0/0:1:2 0/0:0:2 1/0:2:7 0/2:0:2 0/0:1:4 0/0:0:1 0/0:1:2 ./. 0/0:1:3 0/0:2:5 0/0:0:1 0/0:1:3 0/0:1:5 0/0:0:1 1/0:2:3 0/0:0:1 0/0:0:1 ./. 1/0:2:6 0/0:0:1 0/0:1:2 0/0:1:3 0/0:0:1 0/0:1:2 ./. ./. 0/0:1:2 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:3 0/0:2:5 ./. 1 10020464 . G T 74.83 PASS AF=0.06;AFrange=0.01-0.13,95%;AlleleBalance=0.74;DoC=152;FisherStrand=20.7;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 0/1:3:5 0/0:2:6 0/0:0:1 0/0:1:4 ./. ./. ./. ./. 0/0:1:3 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:1:3 0/1:3:4 0/0:1:2 0/0:0:9 ./. 0/0:1:3 0/0:1:2 0/0:0:1 ./. 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:2 0/0:1:3 0/1:3:5 0/0:2:5 0/0:2:5 0/0:1:3 0/0:1:2 ./. 0/0:0:1 0/0:1:4 0/0:1:2 0/0:1:4 0/0:1:3 0/0:0:1 0/1:5:4 0/0:1:4 0/0:0:1 ./. 0/1:2:7 0/0:0:1 0/0:2:7 0/1:1:4 0/0:0:1 0/0:0:1 ./. 0/0:0:2 0/0:1:2 0/0:0:2 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:7 0/0:1:2 1 10020470 . A G,T 91.66 PASS AF=0.06,0.00;AFrange=0.01-0.13,95%;AlleleBalance=0.70;DoC=182;FisherStrand=13.9;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=0.99;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ:RD 0/1:2:5 0/0:2:6 0/0:0:1 0/0:1:4 2/2:0:1 ./. ./. ./. 0/0:0:1 0/0:0:1 0/0:1:2 0/0:1:4 0/0:0:1 0/0:1:4 0/1:2:4 0/0:1:2 0/0:3:9 0/0:1:2 0/0:1:3 0/0:0:1 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:4 0/0:0:1 0/0:0:1 0/0:1:3 0/0:1:3 0/0:1:6 0/1:2:4 0/1:2:5 0/0:3:9 0/0:1:4 0/0:1:2 0/0:1:2 0/0:1:2 0/0:1:5 0/0:1:3 0/0:1:4 0/0:1:4 0/0:0:1 0/1:6:4 0/0:2:6 0/0:1:2 ./. 0/1:5:6 0/0:0:1 0/0:2:9 0/0:1:4 0/0:0:1 0/0:0:1 ./. 0/0:2:8 0/0:1:2 0/0:0:1 0/1:3:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:1:2 -1 10020484 . A C,T 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 +1 10020484 . AG A,TG 55.89 PASS AF=0.04,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.76;DoC=239;FisherStrand=12.2;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.98;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ:RD 0/1:2:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 0/1:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:1:6 0/0:0:1 0/2:2:5 0/0:1:5 0/0:1:3 0/0:1:5 0/1:2:7 0/0:2:8 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:5 ./. 0/1:7:4 0/0:2:7 0/0:1:5 ./. 0/1:3:5 0/0:0:1 0/0:2:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:0:2 0/0:3:10 0/0:1:2 0/0:0:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/0:1:5 1 10020485 . G A,T 32.66 PASS AF=0.03,0.00;AFrange=0.01-0.08,95%;AlleleBalance=0.75;DoC=237;FisherStrand=12.3;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ:RD 1/0:1:7 0/0:2:8 0/0:1:3 0/0:1:4 0/0:1:3 ./. ./. ./. 0/0:1:4 0/0:0:1 0/0:2:5 0/0:1:4 0/0:0:1 0/0:1:4 1/0:3:4 0/0:1:3 0/0:4:16 0/0:1:3 0/0:1:3 ./. 0/0:0:1 0/0:1:2 0/0:1:3 0/0:2:6 0/0:0:1 0/0:1:5 0/0:1:5 0/0:1:3 0/0:0:5 1/0:2:7 0/0:2:6 0/0:3:10 0/0:1:4 ./. 0/0:1:4 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:2:5 ./. 1/0:5:4 0/0:2:7 0/0:2:6 ./. 1/0:2:5 0/0:0:1 0/0:1:7 0/0:1:4 0/0:0:3 0/0:1:2 0/0:1:2 0/0:3:10 0/0:1:2 0/0:1:2 0/0:1:4 0/0:1:5 0/0:1:4 0/0:2:6 0/2:2:4 1 10020492 . T A,G 44.35 PASS AF=0.03,0.00;AFrange=0.01-0.09,95%;AlleleBalance=0.68;DoC=284;FisherStrand=1.4;HomopolymerRun=1;MAPQ0=0;NS=60;OnOffGenotype=1.00;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ:RD 1/0:1:8 0/0:2:7 0/0:1:5 0/0:1:4 0/0:1:4 ./. 0/0:1:2 0/0:0:1 0/0:1:5 0/0:0:1 0/0:3:10 0/0:2:5 2/0:1:2 0/0:1:4 1/0:3:4 0/0:1:3 0/0:5:16 0/0:1:3 0/0:1:3 0/0:0:1 0/0:1:3 1/0:1:4 1/0:0:2 0/0:2:6 0/0:0:1 0/0:2:11 0/0:1:4 0/0:1:4 0/0:2:11 1/0:1:8 0/0:2:11 0/0:3:10 0/0:1:4 0/0:0:1 0/0:1:4 0/0:2:6 0/0:1:5 0/0:1:4 0/0:1:4 0/0:1:2 ./. 1/0:2:3 0/0:2:7 0/0:2:6 0/0:0:1 1/0:3:4 0/0:0:1 0/0:2:8 0/0:2:6 0/0:1:3 0/0:1:2 0/0:1:2 0/0:2:9 0/0:1:2 0/0:1:5 0/0:2:5 0/0:1:6 0/0:1:5 0/0:2:7 0/0:2:9 1 10020615 . C T,A 162.10 PASS AF=0.04,0.00;AFrange=0.01-0.10,95%;AlleleBalance=0.72;DoC=285;FisherStrand=1.1;HomopolymerRun=0;MAPQ0=0;NS=60;OnOffGenotype=0.93;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ:RD 0/0:2:5 0/0:3:10 0/0:2:6 0/0:2:5 0/0:1:4 ./. 0/0:2:6 1/1:0:1 0/0:2:8 0/0:2:8 0/0:6:22 0/0:0:1 0/0:1:3 0/0:1:2 2/2:1:3 0/0:1:3 2/0:7:11 0/0:1:2 0/0:2:6 0/0:1:3 0/0:1:3 0/0:1:3 0/0:2:7 0/0:2:6 ./. 0/0:3:10 0/0:1:4 0/0:1:4 0/0:4:14 0/0:0:1 2/0:5:11 0/0:1:5 0/0:1:3 0/0:0:1 0/0:1:2 0/0:2:6 0/0:1:2 0/0:1:3 0/0:1:4 2/0:5:5 0/0:1:5 0/0:1:2 2/0:2:8 0/0:1:5 0/0:1:5 0/0:1:3 ./. 0/0:1:4 0/0:1:3 0/0:3:10 0/0:0:1 0/0:1:3 0/0:0:1 0/0:1:4 0/0:3:9 0/0:1:3 0/0:1:4 0/0:0:2 0/0:2:6 0/0:1:4 From 74fff72231f9e1094cd79010e9547da85d1db76e Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 25 Mar 2020 15:47:46 -0400 Subject: [PATCH 69/85] update to lastest wdl --- scripts/mitochondria_m2_wdl/AlignAndCall.wdl | 317 ++++++++++++++---- .../mitochondria_m2_wdl/AlignmentPipeline.wdl | 1 + .../MitochondriaPipeline.wdl | 62 +++- 3 files changed, 300 insertions(+), 80 deletions(-) diff --git a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl index bfa2c75a3d8..bee56955879 100644 --- a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl +++ b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl @@ -1,6 +1,6 @@ version 1.0 -import "AlignmentPipeline.wdl" as AlignAndMarkDuplicates +import "https://api.firecloud.org/ga4gh/v1/tools/mitochondria:AlignmentPipeline/versions/1/plain-WDL/descriptor" as AlignAndMarkDuplicates workflow AlignAndCall { meta { @@ -33,8 +33,6 @@ workflow AlignAndCall { File mt_shifted_bwt File mt_shifted_pac File mt_shifted_sa - File blacklisted_sites_shifted - File blacklisted_sites_shifted_index File shift_back_chain @@ -44,6 +42,8 @@ workflow AlignAndCall { Float? vaf_filter_threshold Float? f_score_beta Boolean compress_output_vcf + Float? verifyBamID + Int? max_low_het_sites # Read length used for optimization only. If this is too small CollectWgsMetrics might fail, but the results are not # affected by this number. Default is 151. @@ -96,15 +96,6 @@ workflow AlignAndCall { preemptible_tries = preemptible_tries } - call GetContamination { - input: - input_bam = AlignToMt.mt_aligned_bam, - input_bam_index = AlignToMt.mt_aligned_bai, - ref_fasta = mt_fasta, - ref_fasta_index = mt_fasta_index, - preemptible_tries = preemptible_tries - } - Int? M2_mem = if CollectWgsMetrics.mean_coverage > 25000 then 14 else 7 call M2 as CallMt { @@ -156,11 +147,53 @@ workflow AlignAndCall { preemptible_tries = preemptible_tries } - call Filter { + call Filter as InitialFilter { + input: + raw_vcf = LiftoverAndCombineVcfs.merged_vcf, + raw_vcf_index = LiftoverAndCombineVcfs.merged_vcf_index, + raw_vcf_stats = MergeStats.stats, + sample_name = sample_name, + ref_fasta = mt_fasta, + ref_fai = mt_fasta_index, + ref_dict = mt_dict, + compress = compress_output_vcf, + gatk_override = gatk_override, + m2_extra_filtering_args = m2_filter_extra_args, + max_alt_allele_count = 4, + vaf_filter_threshold = 0, + blacklisted_sites = blacklisted_sites, + blacklisted_sites_index = blacklisted_sites_index, + f_score_beta = f_score_beta, + run_contamination = false, + preemptible_tries = preemptible_tries + } + + + call SplitMultiAllelicsAndRemoveNonPassSites { + input: + ref_fasta = mt_fasta, + ref_fai = mt_fasta_index, + ref_dict = mt_dict, + filtered_vcf = InitialFilter.filtered_vcf, + gatk_override = gatk_override + } + + call GetContamination { + input: + input_vcf = SplitMultiAllelicsAndRemoveNonPassSites.vcf_for_haplochecker, + preemptible_tries = preemptible_tries + } + + call Filter as FilterContamination { input: - raw_vcf = LiftoverAndCombineVcfs.final_vcf, - raw_vcf_index = LiftoverAndCombineVcfs.final_vcf_index, + raw_vcf = InitialFilter.filtered_vcf, + raw_vcf_index = InitialFilter.filtered_vcf_idx, raw_vcf_stats = MergeStats.stats, + run_contamination = true, + hasContamination = GetContamination.hasContamination, + contamination_major = GetContamination.major_level, + contamination_minor = GetContamination.minor_level, + verifyBamID = verifyBamID, sample_name = sample_name, ref_fasta = mt_fasta, ref_fai = mt_fasta_index, @@ -169,93 +202,101 @@ workflow AlignAndCall { gatk_override = gatk_override, m2_extra_filtering_args = m2_filter_extra_args, max_alt_allele_count = 4, - contamination = GetContamination.minor_level, - autosomal_coverage = autosomal_coverage, vaf_filter_threshold = vaf_filter_threshold, blacklisted_sites = blacklisted_sites, blacklisted_sites_index = blacklisted_sites_index, f_score_beta = f_score_beta, preemptible_tries = preemptible_tries + } + + if ( defined(autosomal_coverage) ) { + call FilterNuMTs { + input: + filtered_vcf = FilterContamination.filtered_vcf, + ref_fasta = mt_fasta, + ref_fai = mt_fasta_index, + ref_dict = mt_dict, + autosomal_coverage = autosomal_coverage, + gatk_override = gatk_override, + compress = compress_output_vcf, + preemptible_tries = preemptible_tries + } } + File low_het_vcf = select_first([FilterNuMTs.numt_filtered_vcf, FilterContamination.filtered_vcf]) + + call FilterLowHetSites { + input: + filtered_vcf = low_het_vcf, + ref_fasta = mt_fasta, + ref_fai = mt_fasta_index, + ref_dict = mt_dict, + max_low_het_sites = max_low_het_sites, + gatk_override = gatk_override, + compress = compress_output_vcf, + preemptible_tries = preemptible_tries + } + + output { File mt_aligned_bam = AlignToMt.mt_aligned_bam File mt_aligned_bai = AlignToMt.mt_aligned_bai File mt_aligned_shifted_bam = AlignToShiftedMt.mt_aligned_bam File mt_aligned_shifted_bai = AlignToShiftedMt.mt_aligned_bai - File out_vcf = Filter.filtered_vcf - File out_vcf_index = Filter.filtered_vcf_idx + File out_vcf = FilterLowHetSites.final_filtered_vcf + File out_vcf_index = FilterLowHetSites.final_filtered_vcf_idx + File input_vcf_for_haplochecker = SplitMultiAllelicsAndRemoveNonPassSites.vcf_for_haplochecker File duplicate_metrics = AlignToMt.duplicate_metrics File coverage_metrics = CollectWgsMetrics.metrics File theoretical_sensitivity_metrics = CollectWgsMetrics.theoretical_sensitivity File contamination_metrics = GetContamination.contamination_file Int mean_coverage = CollectWgsMetrics.mean_coverage String major_haplogroup = GetContamination.major_hg - Float contamination = GetContamination.minor_level + Float contamination = FilterContamination.contamination } } + task GetContamination { input { - File input_bam - File input_bam_index - File ref_fasta - File ref_fasta_index - Int qual = 20 - Int map_qual = 30 - Float vaf = 0.01 + File input_vcf + # runtime + Int? preemptible_tries} - # runtime - Int? preemptible_tries - } - - String basename = basename(input_bam, ".bam") - Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") - Int disk_size = ceil(size(input_bam, "GB") + ref_size) + 20 + Int disk_size = ceil(size(input_vcf, "GB")) + 20 meta { - description: "Uses Haplochecker to estimate levels of contamination in mitochondria" + description: "Uses new Haplochecker to estimate levels of contamination in mitochondria" } parameter_meta { - input_bam: "Bam aligned to chrM" - ref_fasta: "chrM reference" + input_vcf: "Filtered and split multi-allelic sites VCF for mitochondria" } - command { + command <<< set -e - - java -jar /usr/mtdnaserver/mitolib.jar haplochecker \ - --in ~{input_bam} \ - --ref ~{ref_fasta} \ - --out haplochecker_out \ - --QUAL ~{qual} \ - --MAPQ ~{map_qual} \ - --VAF ~{vaf} - -python3 < output-noquotes + grep -v "SampleID" output-noquotes > output-data + awk '{print $2}' output-data > contamination.txt + awk '{print $6}' output-data > major_hg.txt + awk '{print $8}' output-data > minor_hg.txt + awk '{print $14}' output-data > mean_het_major.txt + awk '{print $15}' output-data > mean_het_minor.txt + >>> runtime { preemptible: select_first([preemptible_tries, 5]) memory: "3 GB" disks: "local-disk " + disk_size + " HDD" - docker: "gatkworkflows/mtdnaserver:1.2" + docker: "us.gcr.io/broad-dsde-methods/haplochecker:haplochecker-0124" } output { - File contamination_file = "haplochecker_out/~{basename}.contamination.txt" + File contamination_file = "output-noquotes" + String hasContamination = read_string("contamination.txt") String major_hg = read_string("major_hg.txt") - Float major_level = read_float("major_level.txt") String minor_hg = read_string("minor_hg.txt") - Float minor_level = read_float("minor_level.txt") + Float major_level = read_float("mean_het_major.txt") + Float minor_level = read_float("mean_het_minor.txt") } } @@ -356,7 +397,7 @@ task LiftoverAndCombineVcfs { java -jar /usr/gitc/picard.jar MergeVcfs \ I=~{basename}.shifted_back.vcf \ I=~{vcf} \ - O=~{basename}.final.vcf + O=~{basename}.merged.vcf >>> runtime { disks: "local-disk " + disk_size + " HDD" @@ -367,8 +408,8 @@ task LiftoverAndCombineVcfs { output{ # rejected_vcf should always be empty File rejected_vcf = "~{basename}.rejected.vcf" - File final_vcf = "~{basename}.final.vcf" - File final_vcf_index = "~{basename}.final.vcf.idx" + File merged_vcf = "~{basename}.merged.vcf" + File merged_vcf_index = "~{basename}.merged.vcf.idx" } } @@ -404,7 +445,7 @@ task M2 { } parameter_meta { input_bam: "Aligned Bam" - gga_vcf: "VCF for force-calling mode" + gga_vcf: "VCF for genotype given alleles mode" } command <<< set -e @@ -417,7 +458,9 @@ task M2 { gatk --java-options "-Xmx~{command_mem}m" Mutect2 \ -R ~{ref_fasta} \ -I ~{input_bam} \ - ~{"--alleles " + gga_vcf} \ + ~{"--genotyping-mode GENOTYPE_GIVEN_ALLELES --alleles " + gga_vcf} \ + --read-filter MateOnSameContigOrNoMappedMateReadFilter \ + --read-filter MateUnmappedAndUnmappedReadFilter \ -O ~{output_vcf} \ ~{true='--bam-output bamout.bam' false='' make_bamout} \ ~{m2_extra_args} \ @@ -455,11 +498,16 @@ task Filter { String? m2_extra_filtering_args Int max_alt_allele_count - Float contamination Float? autosomal_coverage Float? vaf_filter_threshold Float? f_score_beta + Boolean run_contamination + String? hasContamination + Float? contamination_major + Float? contamination_minor + Float? verifyBamID + File blacklisted_sites File blacklisted_sites_index @@ -469,16 +517,17 @@ task Filter { Int? preemptible_tries } - String output_vcf = sample_name + if compress then ".vcf.gz" else ".vcf" + String output_vcf = sub(sample_name, "(0x20 | 0x9 | 0xD | 0xA)+", "_") + if compress then ".vcf.gz" else ".vcf" String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx" Float ref_size = size(ref_fasta, "GB") + size(ref_fai, "GB") Int disk_size = ceil(size(raw_vcf, "GB") + ref_size) + 20 + Float hc_contamination = if run_contamination && hasContamination == "YES" then (if contamination_major == 0.0 then contamination_minor else 1.0 - contamination_major) else 0.0 + Float max_contamination = if defined(verifyBamID) && verifyBamID > hc_contamination then verifyBamID else hc_contamination meta { description: "Mutect2 Filtering for calling Snps and Indels" } parameter_meta { - autosomal_coverage: "Median coverage of the autosomes for filtering potential polymorphic NuMT variants" vaf_filter_threshold: "Hard cutoff for minimum allele fraction. All sites with VAF less than this cutoff will be filtered." f_score_beta: "F-Score beta balances the filtering strategy between recall and precision. The relative weight of recall to precision." } @@ -497,13 +546,13 @@ task Filter { ~{m2_extra_filtering_args} \ --max-alt-allele-count ~{max_alt_allele_count} \ --mitochondria-mode \ - ~{"--autosomal-coverage " + autosomal_coverage} \ ~{"--min-allele-fraction " + vaf_filter_threshold} \ ~{"--f-score-beta " + f_score_beta} \ - --contamination-estimate ~{contamination} + ~{"--contamination-estimate " + max_contamination} gatk VariantFiltration -V filtered.vcf \ -O ~{output_vcf} \ + --apply-allele-specific-filters \ --mask ~{blacklisted_sites} \ --mask-name "blacklisted_site" @@ -518,6 +567,7 @@ task Filter { output { File filtered_vcf = "~{output_vcf}" File filtered_vcf_idx = "~{output_vcf_index}" + Float contamination = "~{hc_contamination}" } } @@ -546,3 +596,122 @@ task MergeStats { preemptible: select_first([preemptible_tries, 5]) } } + +task SplitMultiAllelicsAndRemoveNonPassSites { + input { + File ref_fasta + File ref_fai + File ref_dict + File filtered_vcf + Int? preemptible_tries + File? gatk_override + } + + command { + set -e + export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override} + gatk LeftAlignAndTrimVariants \ + -R ~{ref_fasta} \ + -V ~{filtered_vcf} \ + -O split.vcf \ + --split-multi-allelics \ + --dont-trim-alleles \ + --keep-original-ac + + gatk SelectVariants \ + -V split.vcf \ + -O splitAndPassOnly.vcf \ + --exclude-filtered + + } + output { + File vcf_for_haplochecker = "splitAndPassOnly.vcf" + } + runtime { + docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + memory: "3 MB" + disks: "local-disk 20 HDD" + preemptible: select_first([preemptible_tries, 5]) + } +} + +task FilterNuMTs { + input { + File ref_fasta + File ref_fai + File ref_dict + File filtered_vcf + Float? autosomal_coverage + Int? preemptible_tries + File? gatk_override + Boolean compress + } + + String basename = basename(filtered_vcf, ".vcf") + String output_vcf = basename + ".numt" + if compress then ".vcf.gz" else ".vcf" + String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx" + + parameter_meta { + autosomal_coverage: "Median coverage of the autosomes for filtering potential polymorphic NuMT variants" + } + + command { + set -e + export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override} + gatk NuMTFilterTool \ + -R ~{ref_fasta} \ + -V ~{filtered_vcf} \ + -O ~{output_vcf} \ + --autosomal-coverage ~{autosomal_coverage} + + } + output { + File numt_filtered_vcf = "~{output_vcf}" + File numt_filtered_vcf_idx = "~{output_vcf_index}" + } + runtime { + docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + memory: "3 MB" + disks: "local-disk 20 HDD" + preemptible: select_first([preemptible_tries, 5]) + } +} + +task FilterLowHetSites { + input { + File ref_fasta + File ref_fai + File ref_dict + File filtered_vcf + Int? max_low_het_sites + Int? preemptible_tries + File? gatk_override + Boolean compress + } + + String basename = basename(filtered_vcf, ".vcf") + String output_vcf = basename + ".final" + if compress then ".vcf.gz" else ".vcf" + String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx" + Int max_sites = select_first([max_low_het_sites, 3]) + + command { + set -e + export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override} + gatk MTLowHeteroplasmyFilterTool \ + -R ~{ref_fasta} \ + -V ~{filtered_vcf} \ + -O ~{output_vcf} \ + --max-allowed-low-hets ~{max_sites} + + } + output { + File final_filtered_vcf = "~{output_vcf}" + File final_filtered_vcf_idx = "~{output_vcf_index}" + } + runtime { + docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + memory: "3 MB" + disks: "local-disk 20 HDD" + preemptible: select_first([preemptible_tries, 5]) + } +} \ No newline at end of file diff --git a/scripts/mitochondria_m2_wdl/AlignmentPipeline.wdl b/scripts/mitochondria_m2_wdl/AlignmentPipeline.wdl index f3cb3fe7a2e..bdcc586a682 100644 --- a/scripts/mitochondria_m2_wdl/AlignmentPipeline.wdl +++ b/scripts/mitochondria_m2_wdl/AlignmentPipeline.wdl @@ -165,6 +165,7 @@ task AlignAndMarkDuplicates { task GetBwaVersion { meta { description: "Gets version of BWA" + volatile: true } command { # not setting set -o pipefail here because /bwa has a rc=1 and we dont want to allow rc=1 to succeed because diff --git a/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl b/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl index d7b4ba66589..24ada3a72aa 100644 --- a/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl +++ b/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl @@ -1,11 +1,11 @@ version 1.0 -import "AlignAndCall.wdl" as AlignAndCall +import "https://api.firecloud.org/ga4gh/v1/tools/mitochondria:AlignAndCall/versions/12/plain-WDL/descriptor" as AlignAndCall workflow MitochondriaPipeline { meta { - description: "Takes in fully aligned hg38 bam and outputs VCF of SNP/Indel calls on the mitochondria." + description: "Takes in an hg38 bam or cram and outputs VCF of SNP/Indel calls on the mitochondria." } input { @@ -45,20 +45,21 @@ workflow MitochondriaPipeline { File mt_shifted_bwt File mt_shifted_pac File mt_shifted_sa - File blacklisted_sites_shifted - File blacklisted_sites_shifted_index File shift_back_chain File control_region_shifted_reference_interval_list File non_control_region_interval_list + String? requester_pays_project File? gatk_override String? m2_extra_args String? m2_filter_extra_args Float? vaf_filter_threshold Float? f_score_beta + Float? verifyBamID Boolean compress_output_vcf = false + Int? max_low_het_sites #Optional runtime arguments Int? preemptible_tries @@ -82,6 +83,7 @@ workflow MitochondriaPipeline { ref_fasta = ref_fasta, ref_fasta_index = ref_fasta_index, ref_dict = ref_dict, + requester_pays_project = requester_pays_project, gatk_override = gatk_override, preemptible_tries = preemptible_tries } @@ -115,16 +117,16 @@ workflow MitochondriaPipeline { mt_shifted_bwt = mt_shifted_bwt, mt_shifted_pac = mt_shifted_pac, mt_shifted_sa = mt_shifted_sa, - blacklisted_sites_shifted = blacklisted_sites_shifted, - blacklisted_sites_shifted_index = blacklisted_sites_shifted_index, shift_back_chain = shift_back_chain, gatk_override = gatk_override, m2_extra_args = m2_extra_args, m2_filter_extra_args = m2_filter_extra_args, vaf_filter_threshold = vaf_filter_threshold, f_score_beta = f_score_beta, + verifyBamID = verifyBamID, compress_output_vcf = compress_output_vcf, max_read_length = max_read_length, + max_low_het_sites = max_low_het_sites, preemptible_tries = preemptible_tries } @@ -146,6 +148,16 @@ workflow MitochondriaPipeline { shifted_ref_fasta_index = mt_shifted_fasta_index, shifted_ref_dict = mt_shifted_dict } + + call SplitMultiAllelicSites { + input: + input_vcf = AlignAndCall.out_vcf, + ref_fasta = mt_fasta, + ref_fasta_index = mt_fasta_index, + ref_dict = mt_dict, + gatk_override = gatk_override, + preemptible_tries = preemptible_tries + } output { File subset_bam = SubsetBamToChrM.output_bam @@ -154,6 +166,9 @@ workflow MitochondriaPipeline { File mt_aligned_bai = AlignAndCall.mt_aligned_bai File out_vcf = AlignAndCall.out_vcf File out_vcf_index = AlignAndCall.out_vcf_index + File split_vcf = SplitMultiAllelicSites.split_vcf + File split_vcf_index = SplitMultiAllelicSites.split_vcf_index + File input_vcf_for_haplochecker = AlignAndCall.input_vcf_for_haplochecker File duplicate_metrics = AlignAndCall.duplicate_metrics File coverage_metrics = AlignAndCall.coverage_metrics File theoretical_sensitivity_metrics = AlignAndCall.theoretical_sensitivity_metrics @@ -171,6 +186,7 @@ task SubsetBamToChrM { File input_bai String contig_name String basename = basename(basename(input_bam, ".cram"), ".bam") + String? requester_pays_project File? ref_fasta File? ref_fasta_index File? ref_dict @@ -204,6 +220,7 @@ task SubsetBamToChrM { -L ~{contig_name} \ --read-filter MateOnSameContigOrNoMappedMateReadFilter \ --read-filter MateUnmappedAndUnmappedReadFilter \ + ~{"--gcs-project-for-requester-pays " + requester_pays_project} \ -I ~{input_bam} \ -O ~{basename}.bam >>> @@ -336,3 +353,36 @@ task CoverageAtEveryBase { File table = "per_base_coverage.tsv" } } + +task SplitMultiAllelicSites { + input { + File ref_fasta + File ref_fasta_index + File ref_dict + File input_vcf + Int? preemptible_tries + File? gatk_override + } + + command { + set -e + export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override} + gatk LeftAlignAndTrimVariants \ + -R ~{ref_fasta} \ + -V ~{input_vcf} \ + -O split.vcf \ + --split-multi-allelics \ + --dont-trim-alleles \ + --keep-original-ac + } + output { + File split_vcf = "split.vcf" + File split_vcf_index = "split.vcf.idx" + } + runtime { + docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + memory: "3 MB" + disks: "local-disk 20 HDD" + preemptible: select_first([preemptible_tries, 5]) + } +} From 272f6b892f61456ca8cc83cd51820732eb6c0611 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 30 Mar 2020 14:29:32 -0400 Subject: [PATCH 70/85] cleanup --- ...ferenceConfidenceVariantContextMerger.java | 4 +-- .../AS_StrandBiasMutectAnnotation.java | 4 +++ .../allelespecific/StrandBiasUtils.java | 3 ++ .../walkers/filters/VariantFiltration.java | 8 ++--- .../mutect/filtering/AlleleFilterUtils.java | 31 ++++++++++++++++-- .../mutect/filtering/ContaminationFilter.java | 12 +++---- .../mutect/filtering/ErrorProbabilities.java | 6 ++-- .../mutect/filtering/HardAlleleFilter.java | 3 ++ .../mutect/filtering/Mutect2AlleleFilter.java | 32 +++++++++++++++++++ .../mutect/filtering/Mutect2Filter.java | 9 ++++-- .../filtering/Mutect2FilteringEngine.java | 9 ------ .../filtering/Mutect2VariantFilter.java | 7 ++++ .../mutect/filtering/NuMTFilterTool.java | 4 +-- .../variant/GATKVariantContextUtils.java | 3 -- .../mutect/Mutect2IntegrationTest.java | 13 -------- 15 files changed, 102 insertions(+), 46 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java index 0cdcb995856..760991af734 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java @@ -35,7 +35,7 @@ public final class ReferenceConfidenceVariantContextMerger { private static final GenotypeLikelihoodCalculators calculators = new GenotypeLikelihoodCalculators(); private static VCFHeader vcfInputHeader = null; protected final VariantAnnotatorEngine annotatorEngine; - protected final boolean doSomaticMerge; + private final boolean doSomaticMerge; protected boolean dropSomaticFilteringAnnotations; protected final OneShotLogger oneShotAnnotationLogger = new OneShotLogger(this.getClass()); protected final OneShotLogger oneShotHeaderLineLogger = new OneShotLogger(this.getClass()); @@ -656,7 +656,7 @@ private static int[] generatePL(final Genotype g, final int[] genotypeIndexMapBy * * @param originalList the array containing the pre-parsed, original TLOD(s) * @param indexesOfRelevantAlleles the indexes of the original alleles corresponding to the new alleles - * @return array of new annotation values, may be null + * @return a List of new annotation values, may be null */ @VisibleForTesting public static List generateAnnotationValueVector(VCFHeaderLineCount alleleCount, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java index 9fd00eac708..f21efd728af 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AS_StrandBiasMutectAnnotation.java @@ -19,8 +19,12 @@ import java.util.List; import java.util.Map; +/** + * Adds the strand bias table annotation for use in mutect filters + */ public class AS_StrandBiasMutectAnnotation extends InfoFieldAnnotation implements StandardMutectAnnotation, AlleleSpecificAnnotation { private final static Logger logger = LogManager.getLogger(StrandBiasBySample.class); + @Override public Map annotate(ReferenceContext ref, VariantContext vc, AlleleLikelihoods likelihoods) { Utils.nonNull(vc); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java index 90386d5e342..50889586587 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/StrandBiasUtils.java @@ -10,6 +10,9 @@ import java.util.*; import java.util.stream.Collectors; +/** + * Common strand bias utilities used by allele specific strand bias annotators + */ public class StrandBiasUtils { public static final int FORWARD = 0; public static final int REVERSE = 1; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java index 9983095f79a..761113cb595 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java @@ -344,11 +344,11 @@ public void onTraversalStart() { @Override public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) { if (applyForAllele) { - List filtered = splitMultiAllelics(variant).stream().map(vc -> filter(vc, new FeatureContext(featureContext, new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd())))).collect(Collectors.toList()); + final List filtered = splitMultiAllelics(variant).stream().map(vc -> filter(vc, new FeatureContext(featureContext, new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd())))).collect(Collectors.toList()); // get filters for each allele - List> alleleFilters = filtered.stream().map(filteredvc -> filteredvc.getFilters()).collect(Collectors.toList()); + final List> alleleFilters = filtered.stream().map(filteredvc -> filteredvc.getFilters()).collect(Collectors.toList()); // add in the AS_FilterStatus and set the variant filters - VariantContext filteredVC = AlleleFilterUtils.addAlleleAndComputeSiteFilters(variant, alleleFilters); + final VariantContext filteredVC = AlleleFilterUtils.addAlleleAndComputeSiteFilters(variant, alleleFilters); writer.add(filteredVC); } else { writer.add(filter(variant, featureContext)); @@ -356,7 +356,7 @@ public void apply(final VariantContext variant, final ReadsContext readsContext, } private List splitMultiAllelics(VariantContext vc) { - List results = new ArrayList<>(); + final List results = new ArrayList<>(); final VariantContextBuilder vcb = new VariantContextBuilder("SimpleSplit", vc.getContig(), vc.getStart(), vc.getEnd(), Arrays.asList(vc.getReference(), Allele.NO_CALL)); vc.getAlternateAlleles().forEach(allele -> results.add(GATKVariantContextUtils.trimAlleles( diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java index cb7814beda0..f028f29f3da 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java @@ -13,20 +13,40 @@ import java.util.stream.Collectors; /** - * Helps read and set allele specific filters + * Helps read and set allele specific filters in the INFO field. Also, helps with updating the Filter field */ public class AlleleFilterUtils { + /** + * Decode the AS_FilterStatus INFO attribute. It is important to trim the strings since the + * parser puts spaces in the coded string. SITE can be returned in the list (i.e. it is not + * removed during the processing). + * @param vc the variant context to read the AS_FilterStatus attribute from + * @return A list for each alt allele which contains a list of the filters that apply + */ public static List> decodeASFilters(VariantContext vc) { return AnnotationUtils.decodeAnyASListWithRawDelim(vc.getCommonInfo().getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, "")).stream() .map(filters -> AnnotationUtils.decodeAnyASList(filters).stream().map(String::trim).collect(Collectors.toList())) .collect(Collectors.toList()); } + /** + * Create the encoded string for AS_FilterStatus from the list of filters for each alt allele. + * The method assumes that SITE has been inserted for any empty filter lists + * @param filters a list for each alt allele that contains a list of the filters that apply to it + * @return the encoded string + */ public static String encodeASFilters(List> filters) { return AnnotationUtils.encodeAnyASListWithRawDelim(filters.stream().map(alleleFilters -> AnnotationUtils.encodeStringList(alleleFilters)).collect(Collectors.toList())); } + /** + * Takes a list of boolean values that indicate whether the filter applies to each of the alternate alleles + * @param vc variant context to use to get existing allele filters + * @param isFiltered list for alternate alleles of whether the specified filter should apply + * @param filterName the name of the filter to apply + * @return the new encoded string for the AS_FilterStatus INFO attribute + */ public static String getMergedASFilterString(VariantContext vc, List isFiltered, String filterName) { List> alleleFilters = decodeASFilters(vc); Utils.validateArg(isFiltered.size() == alleleFilters.size(), "lists are not the same size"); @@ -42,7 +62,14 @@ public static String getMergedASFilterString(VariantContext vc, List is return encodeASFilters(updatedFilters); } - public static List addFilter(List currentFilters, String newFilter) { + /** + * Adds the new filter to the list of current filters. Takes care of replacing the SITE keyword + * if there were no previous filters + * @param currentFilters the current list of filter for the allele + * @param newFilter the new filter to add + * @return the new list of filters + */ + protected static List addFilter(List currentFilters, String newFilter) { if (currentFilters.size() == 1 && currentFilters.contains(GATKVCFConstants.SITE_LEVEL_FILTERS)) { return Collections.singletonList(newFilter); } else { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java index e6a5c0f5a96..71456d5af5b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ContaminationFilter.java @@ -52,14 +52,14 @@ public List calculateErrorProbabilityForAlleles(final VariantContext vc, GATKVCFConstants.POPULATION_AF_KEY, () -> new double[]{Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY}, Double.POSITIVE_INFINITY); final double[] alleleFrequencies = MathUtils.applyToArray(negativeLog10AlleleFrequencies, x -> Math.pow(10,-x)); - SomaticClusteringModel model = filteringEngine.getSomaticClusteringModel(); + final SomaticClusteringModel model = filteringEngine.getSomaticClusteringModel(); final double[] logSomaticLikelihoodPerAllele = Arrays.stream(altADs).mapToDouble(altCount -> model.logLikelihoodGivenSomatic(totalAD, altCount)).toArray(); - double[] singleContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; - double[] manyContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; - double[] logContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; - double[] logOddsOfRealVsContaminationPerAllele = new double[alleleFrequencies.length]; - double[] posteriorProbOfContaminationPerAllele = new double[alleleFrequencies.length]; + final double[] singleContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; + final double[] manyContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; + final double[] logContaminantLikelihoodPerAllele = new double[alleleFrequencies.length]; + final double[] logOddsOfRealVsContaminationPerAllele = new double[alleleFrequencies.length]; + final double[] posteriorProbOfContaminationPerAllele = new double[alleleFrequencies.length]; new IndexRange(0,alleleFrequencies.length).forEach(i -> { singleContaminantLikelihoodPerAllele[i] = 2 * alleleFrequencies[i] * (1 - alleleFrequencies[i]) * MathUtils.binomialProbability(totalAD, altADs[i], contamination /2) + MathUtils.square(alleleFrequencies[i]) * MathUtils.binomialProbability(totalAD, altADs[i], contamination); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java index e06526a21be..c9f73dfd77c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/ErrorProbabilities.java @@ -32,7 +32,7 @@ public ErrorProbabilities(final List filters, final VariantContex // if vc has symbolic alleles, remove them from each filter list alleleProbabilitiesByFilter.replaceAll((k, v) -> GATKVariantContextUtils.removeDataForSymbolicAltAlleles(vc, v)); - LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( + final LinkedHashMap>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect( groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList()))); // convert the data so we have a list of probabilities by allele instead of filter probabilitiesByAllelesForEachFilter.replaceAll((k, v) -> ErrorProbabilities.transpose(v)); @@ -71,7 +71,7 @@ public LinkedHashMap getProbabilitiesForVariantFilters() } private LinkedHashMap> getPartitionedProbabilitiesByFilter(boolean variantOnly) { - Map>> groups = + final Map>> groups = alleleProbabilitiesByFilter.entrySet().stream().collect(Collectors.partitioningBy( entry -> Mutect2VariantFilter.class.isAssignableFrom(entry.getKey().getClass()), toMap(Map.Entry::getKey, Map.Entry::getValue, (a,b) -> a, LinkedHashMap::new))); @@ -84,7 +84,7 @@ public static List> transpose(List> list) { return list; } Utils.validateArg(list.stream().map(List::size).distinct().count() == 1, "lists are not the same size"); - List> iterList = list.stream().map(it -> it.iterator()).collect(toList()); + final List> iterList = list.stream().map(it -> it.iterator()).collect(toList()); return IntStream.range(0, list.get(0).size()) .mapToObj(n -> iterList.stream() .filter(it -> it.hasNext()) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java index 9178aa2ee15..7aec9dc023b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/HardAlleleFilter.java @@ -7,6 +7,9 @@ import java.util.*; import java.util.stream.Collectors; +/** + * Base class for Hard filters that are applied at the allele level + */ public abstract class HardAlleleFilter extends Mutect2AlleleFilter { @Override public List calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 6c152404153..1a36376b246 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -10,20 +10,52 @@ import java.util.function.Predicate; import java.util.stream.Collectors; +/** + * Base class for filters that apply at the allele level. This includes helper functions that + * convert many lists of data for with data for each allele, to lists of data grouped together by the allele + */ public abstract class Mutect2AlleleFilter extends Mutect2Filter { + /** + * The call to use when the data includes data for the reference + * @param vc the variant context to get the genotypes from + * @param preconditions a predicate that must pass for data to be returned + * @param getData a function that returns the list of data for a genotype. the size must match the number of allele in the dataByAllele map + * @param filteringEngine + * @param the type of the data returned + * @return a map the list of values for each allele + */ public static LinkedHashMap> getDataByAllele(final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values LinkedHashMap> dataByAllele = vc.getAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); return combineDataByAllele(dataByAllele, vc, preconditions, getData, filteringEngine); } + /** + * The call to use when the data is only for the alternate alleles + * @param vc the variant context to get the genotypes from + * @param preconditions a predicate that must pass for data to be returned + * @param getAltData a function that returns the list of data for a genotype. the size must match the number of allele in the dataByAllele map + * @param filteringEngine + * @param the type of the data returned + * @return a map with the list of values for each allele + */ public static LinkedHashMap> getAltDataByAllele(final VariantContext vc, Predicate preconditions, Function> getAltData, final Mutect2FilteringEngine filteringEngine) { // create and initialize a map with all the alleles in the vc as keys and new, empty lists as values LinkedHashMap> dataByAllele = vc.getAlternateAlleles().stream().collect(Collectors.toMap(Function.identity(), allele -> new ArrayList<>(), (a, b) -> a, () -> new LinkedHashMap<>())); return combineDataByAllele(dataByAllele, vc, preconditions, getAltData, filteringEngine); } + /** + * Helper function that combines data from multiple genotypes to a list of data for each allele + * @param dataByAllele the map to return with the list of values separated by allele + * @param vc the variant context to get the genotypes from + * @param preconditions a predicate that must pass for data to be returned + * @param getData a function that returns the list of data for a genotype. the size must match the number of allele in the dataByAllele map + * @param filteringEngine + * @param the type of the data returned + * @return the dataByAllele map filled in with the list of values for each allele + */ private static LinkedHashMap> combineDataByAllele(final LinkedHashMap> dataByAllele, final VariantContext vc, Predicate preconditions, Function> getData, final Mutect2FilteringEngine filteringEngine) { // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java index b12e3a010d1..d13a3be4b70 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2Filter.java @@ -8,6 +8,9 @@ import java.util.List; import java.util.Optional; +/** + * Base class for all Mutect2Filters + */ public abstract class Mutect2Filter { // by default do nothing, but we may override to allow some filters to learn their parameters in the first pass of {@link FilterMutectCalls} protected void accumulateDataForLearning(final VariantContext vc, final ErrorProbabilities errorProbabilities, final Mutect2FilteringEngine filteringEngine) { } @@ -24,7 +27,9 @@ protected void learnParametersAndClearAccumulatedData() { protected abstract List requiredInfoAnnotations(); /** - * + * Should be overridden by the implementing class to return the probability that the allele should + * filtered out. For filters that only apply at the site level, the same probability should be + * returned for every alt allele * @param vc * @param filteringEngine * @param referenceContext @@ -32,7 +37,7 @@ protected void learnParametersAndClearAccumulatedData() { */ public abstract List errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext); - // weighted median -- what's the lowest posterior probability that accounts for samples with half of the total alt depth + // weighted median -- what's the lowest posterior probability that accounts for samples with half of the total alt depth protected static double weightedMedianPosteriorProbability(List> depthsAndPosteriors) { final int totalAltDepth = depthsAndPosteriors.stream().mapToInt(ImmutablePair::getLeft).sum(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java index 8d62c7df907..3742349af8e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2FilteringEngine.java @@ -102,15 +102,6 @@ public int[] sumADsOverSamples(final VariantContext vc, final boolean includeTum return ADs; } - // TODO verify not needed and remove -// public int[] sumStrandCountsOverSamples(final VariantContext vc, final boolean includeTumor, final boolean includeNormal) { -// final int[] result = new int[4]; -// vc.getGenotypes().stream().filter(g -> (includeTumor && isTumor(g)) || (includeNormal && isNormal(g))) -// .filter(g -> g.hasExtendedAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) -// .map(g -> StrandBiasTest.getStrandCounts(g)).forEach(sbbs -> new IndexRange(0, 4).forEach(n -> result[n] += sbbs[n])); -// return result; -// } - public double[] weightedAverageOfTumorAFs(final VariantContext vc) { final MutableDouble totalWeight = new MutableDouble(0); final double[] AFs = new double[vc.getNAlleles() - 1]; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java index 985df7f4cee..78b80da489e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2VariantFilter.java @@ -9,6 +9,13 @@ public abstract class Mutect2VariantFilter extends Mutect2Filter { public Mutect2VariantFilter() { } + /** + * Converts the single probability calculated for the site to be the probability for each allele + * @param vc + * @param filteringEngine + * @param referenceContext + * @return + */ @Override public List errorProbabilities(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) { int numAltAlleles = vc.getNAlleles() - 1; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilterTool.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilterTool.java index 1d43a606e28..7abb5837fe0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilterTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/NuMTFilterTool.java @@ -23,8 +23,8 @@ import java.util.stream.Collectors; @CommandLineProgramProperties( - summary = "", - oneLineSummary = "", + summary = "Uses the median autosomal coverage and the allele depth to determine whether the allele might be a NuMT", + oneLineSummary = "Uses the median autosomal coverage and the allele depth to determine whether the allele might be a NuMT", programGroup = VariantFilteringProgramGroup.class ) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 6be0eafd26f..10383d176a6 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -1547,9 +1547,6 @@ public static List splitVariantContextToBiallelics(final Variant } } - //TODO: split allele-specific filters (which are comma-delimited, as applied by VQSR) - - // subset INFO field annotations if available if genotype is called if (genotypeAssignmentMethodUsed != GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS && genotypeAssignmentMethodUsed != GenotypeAssignmentMethod.SET_TO_NO_CALL) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 91530b33e77..283130d55eb 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -604,19 +604,6 @@ public Object[][] vcfsForNuMTFiltering() { Arrays.asList(GATKVCFConstants.TUMOR_EVIDENCE_FILTER_NAME + ", " + GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME + ", " + GATKVCFConstants.STRICT_STRAND_BIAS_FILTER_NAME, GATKVCFConstants.ALLELE_FRACTION_FILTER_NAME + ", " + GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME) // weak_evidence, strand_bias, strict_stand|low_allele_frac, possible_numt )} -// {NA12878_MITO_GVCF, 0.5, Arrays.asList("MT:1", "MT:37", "MT:40", "MT:152", "MT:157"), Arrays.asList( -// Collections.emptySet(), -// Collections.emptySet(), -// Collections.emptySet(), -// Collections.emptySet(), -// ImmutableSet.of(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME)), -// Arrays.asList( -// Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS), //".", -// Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.SITE_LEVEL_FILTERS), //"weak_evidence, base_qual, strand_bias|.", -// Arrays.asList(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.SITE_LEVEL_FILTERS), // "possible_numt|.", -// Arrays.asList(GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.SITE_LEVEL_FILTERS, GATKVCFConstants.SITE_LEVEL_FILTERS), //".|weak_evidence, base_qual, strand_bias, low_allele_frac|.", -// Arrays.asList(GATKVCFConstants.POSSIBLE_NUMT_FILTER_NAME, GATKVCFConstants.SITE_LEVEL_FILTERS) // "possible_numt|." -// )} }; } From 35ab2849b737c1353a53f1dfb11928359832b8bb Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 31 Mar 2020 10:45:57 -0400 Subject: [PATCH 71/85] updating wdl --- scripts/mitochondria_m2_wdl/AlignAndCall.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl index bee56955879..84b2221eb96 100644 --- a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl +++ b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl @@ -260,8 +260,9 @@ workflow AlignAndCall { task GetContamination { input { File input_vcf - # runtime - Int? preemptible_tries} + # runtime + Int? preemptible_tries + } Int disk_size = ceil(size(input_vcf, "GB")) + 20 From 0dfab1da9c821ca70dc6b5c1f17a58d131c234c9 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 1 Apr 2020 13:58:47 -0400 Subject: [PATCH 72/85] fix bug that removes existing filters --- .../walkers/filters/VariantFiltration.java | 2 +- .../mutect/filtering/AlleleFilterUtils.java | 71 ++++++++++++------- .../VariantFiltrationIntegrationTest.java | 13 ++++ ...tVariantFiltration_testMaskWithFilters.vcf | 69 ++++++++++++++++++ .../filters/VariantFiltration/filtered.vcf | 68 ++++++++++++++++++ 5 files changed, 196 insertions(+), 27 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java index 761113cb595..d7f8f9520a9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java @@ -348,7 +348,7 @@ public void apply(final VariantContext variant, final ReadsContext readsContext, // get filters for each allele final List> alleleFilters = filtered.stream().map(filteredvc -> filteredvc.getFilters()).collect(Collectors.toList()); // add in the AS_FilterStatus and set the variant filters - final VariantContext filteredVC = AlleleFilterUtils.addAlleleAndComputeSiteFilters(variant, alleleFilters); + final VariantContext filteredVC = AlleleFilterUtils.addAlleleAndSiteFilters(variant, alleleFilters, invalidatePreviousFilters); writer.add(filteredVC); } else { writer.add(filter(variant, featureContext)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java index f028f29f3da..3d55b57abfc 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java @@ -2,12 +2,9 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; -import htsjdk.variant.vcf.VCFConstants; -import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation; import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; -import shaded.cloud_nio.com.google.errorprone.annotations.Var; import java.util.*; import java.util.stream.Collectors; @@ -55,7 +52,7 @@ public static String getMergedASFilterString(VariantContext vc, List is List> updatedFilters = alleleFilters.stream().map(filters -> { Boolean filtered = isFilteredIt.next(); if (filtered) { - return addFilter(filters, filterName); + return addAlleleFilters(filters, Collections.singletonList(filterName)); } else return filters; }).collect(Collectors.toList()); @@ -63,42 +60,64 @@ public static String getMergedASFilterString(VariantContext vc, List is } /** - * Adds the new filter to the list of current filters. Takes care of replacing the SITE keyword + * Adds the new filters to the list of current filters. Takes care of replacing the SITE keyword * if there were no previous filters - * @param currentFilters the current list of filter for the allele - * @param newFilter the new filter to add - * @return the new list of filters + * @param currentAlleleFilters the current list of filter for the allele + * @param newFilters the new filters to add + * @return the updated list of filters */ - protected static List addFilter(List currentFilters, String newFilter) { - if (currentFilters.size() == 1 && currentFilters.contains(GATKVCFConstants.SITE_LEVEL_FILTERS)) { - return Collections.singletonList(newFilter); + protected static List addAlleleFilters(List currentAlleleFilters, List newFilters) { + if (newFilters.isEmpty()) { + return currentAlleleFilters; + } else if (currentAlleleFilters.isEmpty() || (currentAlleleFilters.size() == 1 && currentAlleleFilters.contains(GATKVCFConstants.SITE_LEVEL_FILTERS))) { + // new filters is not empty and there are no filters currently set for the allele + return newFilters; } else { - List updated = new ArrayList<>(); - updated.addAll(currentFilters); - updated.add(newFilter); - return updated; + LinkedHashSet updated = new LinkedHashSet<>(); + updated.addAll(currentAlleleFilters); + updated.addAll(newFilters); + return updated.stream().collect(Collectors.toList()); } + } /** + * Adds the new allele filters to the existing allele filters in the vc. Computes whether there are + * new site filters and updates the filter in the vc. If there are no site filters, sets filters to pass * Sets the filters for each allele and calculates the intersection of the allele filters to set on the variant. * PASS if the intersection is empty. - * @param vc The variant context to build from, however it assumes all relevant filters are set in the alleleFilters collection - * @param alleleFilters filters to be applied to each allele, the intersection of these filters are applied at the site level + * @param vc The variant context to add the filters to, both at the allele and site level + * @param newAlleleFilters filters to be applied to each allele, the intersection of these filters are applied at the site level + * @param invalidatePreviousFilters whether existing filters should be removed * @return The updated variant context */ - public static VariantContext addAlleleAndComputeSiteFilters(VariantContext vc, List> alleleFilters) { - String encodedFilters = AlleleFilterUtils.encodeASFilters(alleleFilters.stream().map( - af -> af.isEmpty() ? Collections.singletonList(GATKVCFConstants.SITE_LEVEL_FILTERS) : af.stream().collect(Collectors.toList())).collect(Collectors.toList())); - VariantContextBuilder vcb = new VariantContextBuilder(vc).attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, encodedFilters); + public static VariantContext addAlleleAndSiteFilters(VariantContext vc, List> newAlleleFilters, boolean invalidatePreviousFilters) { + if (newAlleleFilters.isEmpty()) { + return vc; + } + List> currentAlleleFilters = decodeASFilters(vc); + if (!currentAlleleFilters.isEmpty() && newAlleleFilters.size() != currentAlleleFilters.size()) { + // log an error + return vc; + } - Set siteFilters = alleleFilters.stream().skip(1) - .collect(()->new HashSet<>(alleleFilters.get(0)), Set::retainAll, Set::retainAll); + if (currentAlleleFilters.isEmpty() || invalidatePreviousFilters) { + currentAlleleFilters = new ArrayList<>(Collections.nCopies(newAlleleFilters.size(), Collections.singletonList(GATKVCFConstants.SITE_LEVEL_FILTERS))); + } + ListIterator> currentAlleleFiltersIt = currentAlleleFilters.listIterator(); + List> updatedAlleleFilters = newAlleleFilters.stream().map(newfilters -> addAlleleFilters(currentAlleleFiltersIt.next(), newfilters.stream().collect(Collectors.toList()))).collect(Collectors.toList()); + String encodedFilters = encodeASFilters(updatedAlleleFilters); + VariantContextBuilder vcb = new VariantContextBuilder(vc).attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, encodedFilters); - if (!siteFilters.isEmpty()) { - vcb.filters(siteFilters); - } else { + if (invalidatePreviousFilters) { + vcb.unfiltered(); + } + Set siteFilters = newAlleleFilters.stream().skip(1) + .collect(()->new HashSet<>(newAlleleFilters.get(0)), Set::retainAll, Set::retainAll); + if (siteFilters.isEmpty() && !invalidatePreviousFilters) { vcb.passFilters(); + } else { + siteFilters.stream().forEach(filter -> vcb.filter(filter)); } return vcb.make(); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java index fd391d450f8..805f454d24d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java @@ -62,6 +62,19 @@ public void testMask(final String maskName, final String mask, final String expe spec.executeTest("testMask", this); } + @Test + public void testMaskWithFilters() throws IOException { + final String maskName = "blacklisted_site"; + final String mask = "--apply-allele-specific-filters --mask " + new File(getToolTestDataDir() + "blacklistedMask.bed").getAbsolutePath(); + final String expected = "testVariantFiltration_testMaskWithFilters.vcf"; + final IntegrationTestSpec spec = new IntegrationTestSpec( + baseTestString("filtered.vcf", " -mask-name " + maskName + " " + mask), + Arrays.asList(getToolTestDataDir() + "expected/" + expected) + ); + + spec.executeTest("testMask", this); + } + @Test public void testMaskReversed() throws IOException { final IntegrationTestSpec spec = new IntegrationTestSpec( diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters.vcf new file mode 100644 index 00000000000..f78dacb604c --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters.vcf @@ -0,0 +1,69 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##Mutect Version=2.1 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##source=FilterMutectCalls +##source=Mutect2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +1 152 . T C . PASS AS_FilterStatus=SITE;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +1 263 . A G . PASS AS_FilterStatus=SITE;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +1 301 . A AC . blacklisted_site;low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=weak_evidence,low_allele_frac,possible_numt,blacklisted_site;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +1 302 . A AC,C,ACC . blacklisted_site AS_FilterStatus=blacklisted_site|weak_evidence,low_allele_frac,possible_numt,blacklisted_site|low_allele_frac,possible_numt,blacklisted_site;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +1 310 . T TC . blacklisted_site AS_FilterStatus=blacklisted_site;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +1 750 . A G . PASS AS_FilterStatus=SITE;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf new file mode 100644 index 00000000000..1987cd3810f --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf @@ -0,0 +1,68 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##Mutect Version=2.1 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##source=FilterMutectCalls +##source=Mutect2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +1 152 . T C . PASS AS_FilterStatus=SITE;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +1 263 . A G . PASS AS_FilterStatus=SITE;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +1 301 . A AC . low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +1 302 . A AC,C,ACC . PASS AS_FilterStatus=SITE|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +1 310 . T TC . PASS AS_FilterStatus=SITE;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +1 750 . A G . PASS AS_FilterStatus=SITE;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 9b8f2118dca98f17e09c856b395b7703b14a9e33 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 1 Apr 2020 14:10:23 -0400 Subject: [PATCH 73/85] add one more test --- .../VariantFiltrationIntegrationTest.java | 16 +++-- ...ariantFiltration_testMaskWithFilters1.vcf} | 0 ...VariantFiltration_testMaskWithFilters2.vcf | 69 +++++++++++++++++++ 3 files changed, 80 insertions(+), 5 deletions(-) rename src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/{testVariantFiltration_testMaskWithFilters.vcf => testVariantFiltration_testMaskWithFilters1.vcf} (100%) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters2.vcf diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java index 805f454d24d..c6e8851a096 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltrationIntegrationTest.java @@ -62,11 +62,17 @@ public void testMask(final String maskName, final String mask, final String expe spec.executeTest("testMask", this); } - @Test - public void testMaskWithFilters() throws IOException { - final String maskName = "blacklisted_site"; - final String mask = "--apply-allele-specific-filters --mask " + new File(getToolTestDataDir() + "blacklistedMask.bed").getAbsolutePath(); - final String expected = "testVariantFiltration_testMaskWithFilters.vcf"; + @DataProvider(name="masksWithFilters") + public Object[][] masksWithFilters() { + return new String[][]{ + {"blacklisted_site", "--apply-allele-specific-filters --mask " + new File(getToolTestDataDir() + "blacklistedMask.bed").getAbsolutePath(), "testVariantFiltration_testMaskWithFilters1.vcf"}, + {"blacklisted_site", "--invalidate-previous-filters --apply-allele-specific-filters --mask " + new File(getToolTestDataDir() + "blacklistedMask.bed").getAbsolutePath(), "testVariantFiltration_testMaskWithFilters2.vcf"} + }; + } + + + @Test(dataProvider = "masksWithFilters") + public void testMaskWithFilters(final String maskName, final String mask, final String expected) throws IOException { final IntegrationTestSpec spec = new IntegrationTestSpec( baseTestString("filtered.vcf", " -mask-name " + maskName + " " + mask), Arrays.asList(getToolTestDataDir() + "expected/" + expected) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters1.vcf similarity index 100% rename from src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters.vcf rename to src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters1.vcf diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters2.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters2.vcf new file mode 100644 index 00000000000..028c4572bb4 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters2.vcf @@ -0,0 +1,69 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##Mutect Version=2.1 +##contig= +##filtering_status=These calls have been filtered by FilterMutectCalls to label false positives with a list of failed filters and true positives with PASS. +##source=FilterMutectCalls +##source=Mutect2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +1 152 . T C . . AS_FilterStatus=SITE;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true +1 263 . A G . . AS_FilterStatus=SITE;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +1 301 . A AC . blacklisted_site AS_FilterStatus=blacklisted_site;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 +1 302 . A AC,C,ACC . blacklisted_site AS_FilterStatus=blacklisted_site|blacklisted_site|blacklisted_site;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 +1 310 . T TC . blacklisted_site AS_FilterStatus=blacklisted_site;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 +1 750 . A G . . AS_FilterStatus=SITE;DP=1568;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5097.90 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60,60:40:true From 3b43c103c4366214fd4c2a2125f985485e75f82d Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 1 Apr 2020 17:19:30 -0400 Subject: [PATCH 74/85] fix overwrite of filter --- .../tools/walkers/mutect/filtering/AlleleFilterUtils.java | 7 +++---- .../testVariantFiltration_testMaskWithFilters1.vcf | 2 +- .../tools/walkers/filters/VariantFiltration/filtered.vcf | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java index 3d55b57abfc..2f64657a6f4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/AlleleFilterUtils.java @@ -112,12 +112,11 @@ public static VariantContext addAlleleAndSiteFilters(VariantContext vc, List siteFilters = newAlleleFilters.stream().skip(1) + Set siteFiltersToAdd = newAlleleFilters.stream().skip(1) .collect(()->new HashSet<>(newAlleleFilters.get(0)), Set::retainAll, Set::retainAll); - if (siteFilters.isEmpty() && !invalidatePreviousFilters) { + siteFiltersToAdd.stream().forEach(filter -> vcb.filter(filter)); + if ((vcb.getFilters() == null || vcb.getFilters().isEmpty()) && !invalidatePreviousFilters) { vcb.passFilters(); - } else { - siteFilters.stream().forEach(filter -> vcb.filter(filter)); } return vcb.make(); } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters1.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters1.vcf index f78dacb604c..3a723c58ff4 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters1.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/expected/testVariantFiltration_testMaskWithFilters1.vcf @@ -62,7 +62,7 @@ ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 1 152 . T C . PASS AS_FilterStatus=SITE;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -1 263 . A G . PASS AS_FilterStatus=SITE;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +1 263 . A G . weak_evidence AS_FilterStatus=weak_evidence;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 1 301 . A AC . blacklisted_site;low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=weak_evidence,low_allele_frac,possible_numt,blacklisted_site;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 1 302 . A AC,C,ACC . blacklisted_site AS_FilterStatus=blacklisted_site|weak_evidence,low_allele_frac,possible_numt,blacklisted_site|low_allele_frac,possible_numt,blacklisted_site;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 1 310 . T TC . blacklisted_site AS_FilterStatus=blacklisted_site;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf index 1987cd3810f..0ba674d1f58 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf @@ -61,7 +61,7 @@ ##source=Mutect2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 1 152 . T C . PASS AS_FilterStatus=SITE;DP=1582;ECNT=1;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=5266.19 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60,60:42:true -1 263 . A G . PASS AS_FilterStatus=SITE;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 +1 263 . A G . weak_evidence AS_FilterStatus=weak_evidence;DP=858;ECNT=4;OCM=800;POPAF=5.000e-08;STRANDQ=93;TLOD=2641.72 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60,60:32 1 301 . A AC . low_allele_frac;possible_numt;weak_evidence AS_FilterStatus=weak_evidence,low_allele_frac,possible_numt;DP=680;ECNT=4;OCM=0;POPAF=5.000e-08;STRANDQ=93;TLOD=3.32 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60,60:34 1 302 . A AC,C,ACC . PASS AS_FilterStatus=SITE|weak_evidence,low_allele_frac,possible_numt|low_allele_frac,possible_numt;DP=659;ECNT=4;OCM=0;POPAF=5.000e-08,5.000e-08,5.000e-08;STRANDQ=93;TLOD=891.23,10.66,67.66 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60,60:41,33,38 1 310 . T TC . PASS AS_FilterStatus=SITE;DP=705;ECNT=4;OCM=0;POPAF=5.000e-08;RPA=5,6;RU=C;STR;STRANDQ=93;TLOD=1974.89 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60,60:33 From 8191f3c3f24f6148459295e5597796f26b411f07 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 2 Apr 2020 14:14:45 -0400 Subject: [PATCH 75/85] add missing test files --- .../filters/VariantFiltration/blacklistedMask.bed | 6 ++++++ .../VariantFiltration/blacklistedMask.bed.idx | Bin 0 -> 322 bytes .../filters/VariantFiltration/filtered.vcf.idx | Bin 0 -> 307 bytes 3 files changed, 6 insertions(+) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/blacklistedMask.bed create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/blacklistedMask.bed.idx create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/filtered.vcf.idx diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/blacklistedMask.bed b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/blacklistedMask.bed new file mode 100644 index 00000000000..e99fc5c332d --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/blacklistedMask.bed @@ -0,0 +1,6 @@ +1 300 301 . 500 + +1 301 302 . 500 + +1 309 310 . 500 + +1 315 316 . 500 + +1 3106 3107 . 500 + +1 16181 16182 . 500 + diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/blacklistedMask.bed.idx b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration/blacklistedMask.bed.idx new file mode 100644 index 0000000000000000000000000000000000000000..9e201e930cf457fb41bb0daa951034d7b8e3dc6f GIT binary patch literal 322 zcmZ8bO-sW-5FJ18pLp%G2R(QYLbLq<8=)q_gAg{^X}e^};?5ND=8yIVxGV0(2akF4 z{pxM`dI11ekq7JO0TCOfz^G4}nB6B1-Vh$OjL2a?Ar=h8{u~Bk^x=qo@Y>jk#mZR- zPvl%rlc9j(y<_~)ZcG>BeMzy^!Rkp?F%4R*pRjj&7@cJyv(r4zdNL5AqlC@v_uYH4 zm~b7rQbpbDRrjQ-b-n3!s;FQwoByxB)MZzy7Ur>)X1cuB7W-{cE!A%)7njdXRl_}` VM+^9R1h|YUzrBe%Q_N!FS+H^um)~ zoV@4Q&o0Zi6970>ZhWA7M68(-quV)R4qw>Co^b1AkDNLbV!=cl52+)@n6|i1(e-{{ z@p2Ht9fh!_p{IlrV_^JrVV@`Je95(O$-6-o`b Date: Fri, 3 Apr 2020 12:48:45 -0400 Subject: [PATCH 76/85] should fix wdl tests --- scripts/mitochondria_m2_wdl/AlignAndCall.wdl | 8 +++----- .../ExampleInputsMitochondriaPipeline.json | 2 -- scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl | 4 +++- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl index 84b2221eb96..abe09aa597c 100644 --- a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl +++ b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl @@ -1,6 +1,8 @@ version 1.0 -import "https://api.firecloud.org/ga4gh/v1/tools/mitochondria:AlignmentPipeline/versions/1/plain-WDL/descriptor" as AlignAndMarkDuplicates +import "AlignmentPipeline" as AlignAndMarkDuplicates + +#import "https://api.firecloud.org/ga4gh/v1/tools/mitochondria:AlignmentPipeline/versions/1/plain-WDL/descriptor" as AlignAndMarkDuplicates workflow AlignAndCall { meta { @@ -424,8 +426,6 @@ task M2 { String? m2_extra_args Boolean? make_bamout Boolean compress - File? gga_vcf - File? gga_vcf_idx File? gatk_override # runtime Int? mem @@ -446,7 +446,6 @@ task M2 { } parameter_meta { input_bam: "Aligned Bam" - gga_vcf: "VCF for genotype given alleles mode" } command <<< set -e @@ -459,7 +458,6 @@ task M2 { gatk --java-options "-Xmx~{command_mem}m" Mutect2 \ -R ~{ref_fasta} \ -I ~{input_bam} \ - ~{"--genotyping-mode GENOTYPE_GIVEN_ALLELES --alleles " + gga_vcf} \ --read-filter MateOnSameContigOrNoMappedMateReadFilter \ --read-filter MateUnmappedAndUnmappedReadFilter \ -O ~{output_vcf} \ diff --git a/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json b/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json index cbe3bfc0b5d..8fe8d088054 100644 --- a/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json +++ b/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json @@ -21,8 +21,6 @@ "MitochondriaPipeline.mt_shifted_bwt": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.bwt", "MitochondriaPipeline.mt_shifted_pac": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.pac", "MitochondriaPipeline.mt_shifted_sa": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.sa", - "MitochondriaPipeline.blacklisted_sites_shifted": "gs://broad-references/hg38/v0/chrM/blacklist_sites.hg38.chrM.shifted_by_8000_bases.fixed.bed", - "MitochondriaPipeline.blacklisted_sites_shifted_index": "gs://broad-references/hg38/v0/chrM/blacklist_sites.hg38.chrM.shifted_by_8000_bases.fixed.bed.idx", "MitochondriaPipeline.shift_back_chain": "gs://broad-references/hg38/v0/chrM/ShiftBack.chain", "MitochondriaPipeline.control_region_shifted_reference_interval_list": "gs://broad-references/hg38/v0/chrM/control_region_shifted.chrM.interval_list", "MitochondriaPipeline.non_control_region_interval_list": "gs://broad-references/hg38/v0/chrM/non_control_region.chrM.interval_list" diff --git a/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl b/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl index 24ada3a72aa..6124bcc2c06 100644 --- a/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl +++ b/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl @@ -1,6 +1,8 @@ version 1.0 -import "https://api.firecloud.org/ga4gh/v1/tools/mitochondria:AlignAndCall/versions/12/plain-WDL/descriptor" as AlignAndCall +import "AlignAndCall.wdl" as AlignAndCall + +#import "https://api.firecloud.org/ga4gh/v1/tools/mitochondria:AlignAndCall/versions/12/plain-WDL/descriptor" as AlignAndCall workflow MitochondriaPipeline { From 37c8c81e6284f6ccbf3495892ac4be47d712d8d3 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 3 Apr 2020 14:26:55 -0400 Subject: [PATCH 77/85] fix wdl tests --- scripts/m2_cromwell_tests/run_m2_wdl.sh | 5 +++- .../test_mitochondria_m2_wdl.json | 1 + scripts/mitochondria_m2_wdl/AlignAndCall.wdl | 29 ++++++++++++++----- .../MitochondriaPipeline.wdl | 10 +++++-- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/scripts/m2_cromwell_tests/run_m2_wdl.sh b/scripts/m2_cromwell_tests/run_m2_wdl.sh index 47f7879021d..4e5d1ec799e 100644 --- a/scripts/m2_cromwell_tests/run_m2_wdl.sh +++ b/scripts/m2_cromwell_tests/run_m2_wdl.sh @@ -36,6 +36,9 @@ cd $WORKING_DIR/gatk/scripts/ sed -r "s/__GATK_DOCKER__/broadinstitute\/gatk\:$HASH_TO_USE/g" m2_cromwell_tests/test_m2_wdl_multi.json >$WORKING_DIR/test_m2_wdl_multi_mod.json echo "JSON FILE (modified) =======" cat $WORKING_DIR/test_m2_wdl_multi_mod.json +sed -r "s/__GATK_DOCKER__/broadinstitute\/gatk\:$HASH_TO_USE/g" m2_cromwell_tests/test_mitochondria_m2_wdl.json >$WORKING_DIR/test_mitochondria_m2_wdl_mod.json +echo "JSON FILE (modified) =======" +cat $WORKING_DIR/test_mitochondria_m2_wdl_mod.json echo "==================" # Create the tumor-only json by using the pair_list_tumor_only file @@ -52,4 +55,4 @@ sudo java -jar $CROMWELL_JAR run $WORKING_DIR/gatk/scripts/mutect2_wdl/mutect2_m echo "Running Mitochondria M2 WDL through cromwell" ln -fs $WORKING_DIR/gatk/scripts/mitochondria_m2_wdl/AlignAndCall.wdl ln -fs $WORKING_DIR/gatk/scripts/mitochondria_m2_wdl/AlignmentPipeline.wdl -sudo java -jar $CROMWELL_JAR run $WORKING_DIR/gatk/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl -i $WORKING_DIR/gatk/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json -m $WORKING_DIR/test_mitochondria_m2_wdl.metadata +sudo java -jar $CROMWELL_JAR run $WORKING_DIR/gatk/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl -i $WORKING_DIR/gatk/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl_mod.json -m $WORKING_DIR/test_mitochondria_m2_wdl.metadata diff --git a/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json b/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json index f8bbb6297df..d3a4be36769 100644 --- a/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json +++ b/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json @@ -1,4 +1,5 @@ { + "MitochondriaPipeline.gatk_docker_override": "__GATK_DOCKER__", "MitochondriaPipeline.wgs_aligned_input_bam_or_cram": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/NA12878.alignedHg38.duplicateMarked.baseRealigned.bam", "MitochondriaPipeline.wgs_aligned_input_bam_or_cram_index": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/NA12878.alignedHg38.duplicateMarked.baseRealigned.bam.bai", "MitochondriaPipeline.autosomal_coverage": 30, diff --git a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl index abe09aa597c..c6b4feef54c 100644 --- a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl +++ b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl @@ -39,6 +39,7 @@ workflow AlignAndCall { File shift_back_chain File? gatk_override + String? gatk_docker_override String? m2_extra_args String? m2_filter_extra_args Float? vaf_filter_threshold @@ -109,6 +110,7 @@ workflow AlignAndCall { ref_dict = mt_dict, compress = compress_output_vcf, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, # Everything is called except the control region. m2_extra_args = select_first([m2_extra_args, ""]) + " -L chrM:576-16024 ", mem = M2_mem, @@ -124,6 +126,7 @@ workflow AlignAndCall { ref_dict = mt_shifted_dict, compress = compress_output_vcf, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, # Everything is called except the control region. m2_extra_args = select_first([m2_extra_args, ""]) + " -L chrM:8025-9144 ", mem = M2_mem, @@ -146,6 +149,7 @@ workflow AlignAndCall { shifted_stats = CallShiftedMt.stats, non_shifted_stats = CallMt.stats, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, preemptible_tries = preemptible_tries } @@ -160,6 +164,7 @@ workflow AlignAndCall { ref_dict = mt_dict, compress = compress_output_vcf, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, m2_extra_filtering_args = m2_filter_extra_args, max_alt_allele_count = 4, vaf_filter_threshold = 0, @@ -177,7 +182,8 @@ workflow AlignAndCall { ref_fai = mt_fasta_index, ref_dict = mt_dict, filtered_vcf = InitialFilter.filtered_vcf, - gatk_override = gatk_override + gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override } call GetContamination { @@ -202,6 +208,7 @@ workflow AlignAndCall { ref_dict = mt_dict, compress = compress_output_vcf, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, m2_extra_filtering_args = m2_filter_extra_args, max_alt_allele_count = 4, vaf_filter_threshold = vaf_filter_threshold, @@ -220,6 +227,7 @@ workflow AlignAndCall { ref_dict = mt_dict, autosomal_coverage = autosomal_coverage, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, compress = compress_output_vcf, preemptible_tries = preemptible_tries } @@ -235,6 +243,7 @@ workflow AlignAndCall { ref_dict = mt_dict, max_low_het_sites = max_low_het_sites, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, compress = compress_output_vcf, preemptible_tries = preemptible_tries } @@ -428,6 +437,7 @@ task M2 { Boolean compress File? gatk_override # runtime + String? gatk_docker_override Int? mem Int? preemptible_tries } @@ -469,7 +479,7 @@ task M2 { --max-mnp-distance 0 >>> runtime { - docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.1.0"]) memory: machine_mem + " MB" disks: "local-disk " + disk_size + " HDD" preemptible: select_first([preemptible_tries, 5]) @@ -511,6 +521,7 @@ task Filter { File blacklisted_sites_index File? gatk_override + String? gatk_docker_override # runtime Int? preemptible_tries @@ -557,7 +568,7 @@ task Filter { >>> runtime { - docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.1.0"]) memory: "4 MB" disks: "local-disk " + disk_size + " HDD" preemptible: select_first([preemptible_tries, 5]) @@ -576,6 +587,7 @@ task MergeStats { File non_shifted_stats Int? preemptible_tries File? gatk_override + String? gatk_docker_override } command{ @@ -589,7 +601,7 @@ task MergeStats { File stats = "raw.combined.stats" } runtime { - docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.1.0"]) memory: "3 MB" disks: "local-disk 20 HDD" preemptible: select_first([preemptible_tries, 5]) @@ -604,6 +616,7 @@ task SplitMultiAllelicsAndRemoveNonPassSites { File filtered_vcf Int? preemptible_tries File? gatk_override + String? gatk_docker_override } command { @@ -627,7 +640,7 @@ task SplitMultiAllelicsAndRemoveNonPassSites { File vcf_for_haplochecker = "splitAndPassOnly.vcf" } runtime { - docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.1.0"]) memory: "3 MB" disks: "local-disk 20 HDD" preemptible: select_first([preemptible_tries, 5]) @@ -643,6 +656,7 @@ task FilterNuMTs { Float? autosomal_coverage Int? preemptible_tries File? gatk_override + String? gatk_docker_override Boolean compress } @@ -669,7 +683,7 @@ task FilterNuMTs { File numt_filtered_vcf_idx = "~{output_vcf_index}" } runtime { - docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.1.0"]) memory: "3 MB" disks: "local-disk 20 HDD" preemptible: select_first([preemptible_tries, 5]) @@ -685,6 +699,7 @@ task FilterLowHetSites { Int? max_low_het_sites Int? preemptible_tries File? gatk_override + String? gatk_docker_override Boolean compress } @@ -708,7 +723,7 @@ task FilterLowHetSites { File final_filtered_vcf_idx = "~{output_vcf_index}" } runtime { - docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.1.0"]) memory: "3 MB" disks: "local-disk 20 HDD" preemptible: select_first([preemptible_tries, 5]) diff --git a/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl b/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl index 6124bcc2c06..a053fdb1b9f 100644 --- a/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl +++ b/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl @@ -55,6 +55,7 @@ workflow MitochondriaPipeline { String? requester_pays_project File? gatk_override + String? gatk_docker_override String? m2_extra_args String? m2_filter_extra_args Float? vaf_filter_threshold @@ -87,6 +88,7 @@ workflow MitochondriaPipeline { ref_dict = ref_dict, requester_pays_project = requester_pays_project, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, preemptible_tries = preemptible_tries } @@ -121,6 +123,7 @@ workflow MitochondriaPipeline { mt_shifted_sa = mt_shifted_sa, shift_back_chain = shift_back_chain, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, m2_extra_args = m2_extra_args, m2_filter_extra_args = m2_filter_extra_args, vaf_filter_threshold = vaf_filter_threshold, @@ -158,6 +161,7 @@ workflow MitochondriaPipeline { ref_fasta_index = mt_fasta_index, ref_dict = mt_dict, gatk_override = gatk_override, + gatk_docker_override = gatk_docker_override, preemptible_tries = preemptible_tries } @@ -194,6 +198,7 @@ task SubsetBamToChrM { File? ref_dict File? gatk_override + String? gatk_docker_override # runtime Int? preemptible_tries @@ -229,7 +234,7 @@ task SubsetBamToChrM { runtime { memory: "3 GB" disks: "local-disk " + disk_size + " HDD" - docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.1.0"]) preemptible: select_first([preemptible_tries, 5]) } output { @@ -364,6 +369,7 @@ task SplitMultiAllelicSites { File input_vcf Int? preemptible_tries File? gatk_override + String? gatk_docker_override } command { @@ -382,7 +388,7 @@ task SplitMultiAllelicSites { File split_vcf_index = "split.vcf.idx" } runtime { - docker: "us.gcr.io/broad-gatk/gatk:4.1.1.0" + docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.1.0"]) memory: "3 MB" disks: "local-disk 20 HDD" preemptible: select_first([preemptible_tries, 5]) From 4dcd0885944bbc7528ab7b9703091b5370afe76a Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Fri, 3 Apr 2020 16:11:33 -0400 Subject: [PATCH 78/85] fix path --- scripts/m2_cromwell_tests/run_m2_wdl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/m2_cromwell_tests/run_m2_wdl.sh b/scripts/m2_cromwell_tests/run_m2_wdl.sh index 4e5d1ec799e..f30f28dfcf9 100644 --- a/scripts/m2_cromwell_tests/run_m2_wdl.sh +++ b/scripts/m2_cromwell_tests/run_m2_wdl.sh @@ -55,4 +55,4 @@ sudo java -jar $CROMWELL_JAR run $WORKING_DIR/gatk/scripts/mutect2_wdl/mutect2_m echo "Running Mitochondria M2 WDL through cromwell" ln -fs $WORKING_DIR/gatk/scripts/mitochondria_m2_wdl/AlignAndCall.wdl ln -fs $WORKING_DIR/gatk/scripts/mitochondria_m2_wdl/AlignmentPipeline.wdl -sudo java -jar $CROMWELL_JAR run $WORKING_DIR/gatk/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl -i $WORKING_DIR/gatk/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl_mod.json -m $WORKING_DIR/test_mitochondria_m2_wdl.metadata +sudo java -jar $CROMWELL_JAR run $WORKING_DIR/gatk/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl -i $WORKING_DIR/test_mitochondria_m2_wdl_mod.json -m $WORKING_DIR/test_mitochondria_m2_wdl.metadata From d6b12294d6784347df60894c3112f9c64b03d1ed Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 6 Apr 2020 13:37:36 -0400 Subject: [PATCH 79/85] fix path again --- scripts/mitochondria_m2_wdl/AlignAndCall.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl index c6b4feef54c..5244865911a 100644 --- a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl +++ b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl @@ -1,6 +1,6 @@ version 1.0 -import "AlignmentPipeline" as AlignAndMarkDuplicates +import "AlignmentPipeline.wdl" as AlignAndMarkDuplicates #import "https://api.firecloud.org/ga4gh/v1/tools/mitochondria:AlignmentPipeline/versions/1/plain-WDL/descriptor" as AlignAndMarkDuplicates From e60ed86feb7657a54b65db5fd528fa9f0f725e10 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Mon, 6 Apr 2020 16:25:55 -0400 Subject: [PATCH 80/85] remove blacklisted shifted --- scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json b/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json index d3a4be36769..dbc4ae3595e 100644 --- a/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json +++ b/scripts/m2_cromwell_tests/test_mitochondria_m2_wdl.json @@ -22,8 +22,6 @@ "MitochondriaPipeline.mt_shifted_bwt": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/mitochondria_references/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.bwt", "MitochondriaPipeline.mt_shifted_pac": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/mitochondria_references/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.pac", "MitochondriaPipeline.mt_shifted_sa": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/mitochondria_references/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.sa", - "MitochondriaPipeline.blacklisted_sites_shifted": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/mitochondria_references/blacklist_sites.hg38.chrM.shifted_by_8000_bases.bed", - "MitochondriaPipeline.blacklisted_sites_shifted_index": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/mitochondria_references/blacklist_sites.hg38.chrM.shifted_by_8000_bases.bed.idx", "MitochondriaPipeline.shift_back_chain": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/mitochondria_references/ShiftBack.chain", "MitochondriaPipeline.control_region_shifted_reference_interval_list": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/mitochondria_references/control_region_shifted.chrM.interval_list", "MitochondriaPipeline.non_control_region_interval_list": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/mitochondria_references/non_control_region.chrM.interval_list" From 717d87fab4fb938aa44737019f62a4ae7fed1172 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 7 Apr 2020 15:33:00 -0400 Subject: [PATCH 81/85] add error checking for contam file format --- scripts/mitochondria_m2_wdl/AlignAndCall.wdl | 21 +++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl index 5244865911a..03126fabb3f 100644 --- a/scripts/mitochondria_m2_wdl/AlignAndCall.wdl +++ b/scripts/mitochondria_m2_wdl/AlignAndCall.wdl @@ -289,6 +289,25 @@ task GetContamination { java -jar /usr/mtdnaserver/haplocheckCLI.jar "${PARENT_DIR}" sed 's/\"//g' output > output-noquotes + + grep "SampleID" output-noquotes > headers + FORMAT_ERROR="Bad contamination file format" + if [ `awk '{print $2}' headers` != "Contamination" ]; then + echo $FORMAT_ERROR; exit 1 + fi + if [ `awk '{print $6}' headers` != "HgMajor" ]; then + echo $FORMAT_ERROR; exit 1 + fi + if [ `awk '{print $8}' headers` != "HgMinor" ]; then + echo $FORMAT_ERROR; exit 1 + fi + if [ `awk '{print $14}' headers` != "MeanHetLevelMajor" ]; then + echo $FORMAT_ERROR; exit 1 + fi + if [ `awk '{print $15}' headers` != "MeanHetLevelMinor" ]; then + echo $FORMAT_ERROR; exit 1 + fi + grep -v "SampleID" output-noquotes > output-data awk '{print $2}' output-data > contamination.txt awk '{print $6}' output-data > major_hg.txt @@ -728,4 +747,4 @@ task FilterLowHetSites { disks: "local-disk 20 HDD" preemptible: select_first([preemptible_tries, 5]) } -} \ No newline at end of file +} From 69cca57628a66e36324ca4062b40c98c80e5a728 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 14 Apr 2020 10:47:27 -0400 Subject: [PATCH 82/85] minor refactor --- .../walkers/mutect/clustering/SomaticClusteringModel.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java index 14648677d45..39daeefbfca 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/clustering/SomaticClusteringModel.java @@ -98,10 +98,10 @@ public SomaticClusteringModel(final M2FiltersArgumentCollection MTFAC, final Lis * @param vc the variant context the data apply to */ public void record(int[] tumorADs, final double[] tumorLogOdds, final List artifactProbabilities, final List nonSomaticProbabilities, final VariantContext vc) { + // get all alt allele indexes for symbolic alleles + List symIndexes = new IndexRange(0, vc.getNAlleles()-1).filter(n -> vc.getAlternateAllele(n).isSymbolic()); + // set tumorAD to 0 for symbolic alleles so it won't contribute to overall AD - List symbolicAlleles = vc.getAlternateAlleles().stream().filter(allele -> allele.isSymbolic()).collect(Collectors.toList()); - // convert allele index to alt allele index - List symIndexes = vc.getAlleleIndices(symbolicAlleles).stream().map(i -> i-1).collect(Collectors.toList()); symIndexes.forEach(i -> tumorADs[i] = 0); final int totalAD = (int) MathUtils.sum(tumorADs); From f51ba1037f969405a808501f567c22c0b8957b9c Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Wed, 15 Apr 2020 09:26:06 -0400 Subject: [PATCH 83/85] update example inputs to use broad public bucket for refs --- .../ExampleInputsMitochondriaPipeline.json | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json b/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json index 8fe8d088054..b1d14aaddc7 100644 --- a/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json +++ b/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json @@ -3,25 +3,25 @@ "MitochondriaPipeline.wgs_aligned_input_bam_or_cram_index": "input_bam_index_here", "MitochondriaPipeline.autosomal_coverage": autosomal_median_coverage_here, "MitochondriaPipeline.sample_name": "SAMPLE_NAME", - "MitochondriaPipeline.mt_dict": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.dict", - "MitochondriaPipeline.mt_fasta": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta", - "MitochondriaPipeline.mt_fasta_index": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.fai", - "MitochondriaPipeline.mt_amb": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.amb", - "MitochondriaPipeline.mt_ann": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.ann", - "MitochondriaPipeline.mt_bwt": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.bwt", - "MitochondriaPipeline.mt_pac": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.pac", - "MitochondriaPipeline.mt_sa": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.sa", - "MitochondriaPipeline.blacklisted_sites": "gs://broad-references/hg38/v0/chrM/blacklist_sites.hg38.chrM.bed", - "MitochondriaPipeline.blacklisted_sites_index": "gs://broad-references/hg38/v0/chrM/blacklist_sites.hg38.chrM.bed.idx", - "MitochondriaPipeline.mt_shifted_dict": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.dict", - "MitochondriaPipeline.mt_shifted_fasta": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta", - "MitochondriaPipeline.mt_shifted_fasta_index": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.fai", - "MitochondriaPipeline.mt_shifted_amb": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.amb", - "MitochondriaPipeline.mt_shifted_ann": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.ann", - "MitochondriaPipeline.mt_shifted_bwt": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.bwt", - "MitochondriaPipeline.mt_shifted_pac": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.pac", - "MitochondriaPipeline.mt_shifted_sa": "gs://broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.sa", - "MitochondriaPipeline.shift_back_chain": "gs://broad-references/hg38/v0/chrM/ShiftBack.chain", - "MitochondriaPipeline.control_region_shifted_reference_interval_list": "gs://broad-references/hg38/v0/chrM/control_region_shifted.chrM.interval_list", - "MitochondriaPipeline.non_control_region_interval_list": "gs://broad-references/hg38/v0/chrM/non_control_region.chrM.interval_list" + "MitochondriaPipeline.mt_dict": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.dict", + "MitochondriaPipeline.mt_fasta": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta", + "MitochondriaPipeline.mt_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.fai", + "MitochondriaPipeline.mt_amb": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.amb", + "MitochondriaPipeline.mt_ann": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.ann", + "MitochondriaPipeline.mt_bwt": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.bwt", + "MitochondriaPipeline.mt_pac": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.pac", + "MitochondriaPipeline.mt_sa": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.fasta.sa", + "MitochondriaPipeline.blacklisted_sites": "gs://gcp-public-data--broad-references/hg38/v0/chrM/blacklist_sites.hg38.chrM.bed", + "MitochondriaPipeline.blacklisted_sites_index": "gs://gcp-public-data--broad-references/hg38/v0/chrM/blacklist_sites.hg38.chrM.bed.idx", + "MitochondriaPipeline.mt_shifted_dict": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.dict", + "MitochondriaPipeline.mt_shifted_fasta": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta", + "MitochondriaPipeline.mt_shifted_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.fai", + "MitochondriaPipeline.mt_shifted_amb": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.amb", + "MitochondriaPipeline.mt_shifted_ann": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.ann", + "MitochondriaPipeline.mt_shifted_bwt": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.bwt", + "MitochondriaPipeline.mt_shifted_pac": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.pac", + "MitochondriaPipeline.mt_shifted_sa": "gs://gcp-public-data--broad-references/hg38/v0/chrM/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta.sa", + "MitochondriaPipeline.shift_back_chain": "gs://gcp-public-data--broad-references/hg38/v0/chrM/ShiftBack.chain", + "MitochondriaPipeline.control_region_shifted_reference_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/chrM/control_region_shifted.chrM.interval_list", + "MitochondriaPipeline.non_control_region_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/chrM/non_control_region.chrM.interval_list" } From 59f3dc9a2fc244fe4f0e7e64ee69055d79bc0018 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Thu, 16 Apr 2020 15:52:33 -0400 Subject: [PATCH 84/85] doc updates --- .../hellbender/tools/walkers/annotator/UniqueAltReadCount.java | 2 +- .../hellbender/tools/walkers/filters/VariantFiltration.java | 2 +- .../tools/walkers/mutect/filtering/Mutect2AlleleFilter.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/UniqueAltReadCount.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/UniqueAltReadCount.java index c7ed7efc6e1..253814afc5e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/UniqueAltReadCount.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/UniqueAltReadCount.java @@ -35,7 +35,7 @@ * *

    This annotation does not require or use any BAM file duplicate flags or UMI information, just the read alignments.

    */ -@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of non-duplicate-insert ALT reads (UNIQ_ALT_READ_COUNT)") +@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of non-duplicate-insert ALT reads (AS_UNIQ_ALT_READ_COUNT)") public class UniqueAltReadCount extends InfoFieldAnnotation implements AlleleSpecificAnnotation { public static final String KEY = GATKVCFConstants.AS_UNIQUE_ALT_READ_SET_COUNT_KEY; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java index d7f8f9520a9..409741afcc0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java @@ -236,7 +236,7 @@ public final class VariantFiltration extends VariantWalker { @Argument(fullName=NO_CALL_GTS_LONG_NAME, optional=true, doc="Set filtered genotypes to no-call") public boolean setFilteredGenotypesToNocall = false; - @Argument(fullName=ALLELE_SPECIFIC_LONG_NAME, optional=true, doc="Set mask at the allele level") + @Argument(fullName=ALLELE_SPECIFIC_LONG_NAME, optional=true, doc="Set mask at the allele level. This option is not compatible with clustering.") public boolean applyForAllele = false; // JEXL expressions for the filters diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 1a36376b246..81a1d10b969 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -73,7 +73,7 @@ private static LinkedHashMap> combineDataByAllele(final Link /** - * + * Returns a list for each alternate allele which is the probability that the allele should be filtered out. * @param vc * @param filteringEngine * @param referenceContext From 33c2942b7fd44d7c6d70947d8629da6f0dd8cc51 Mon Sep 17 00:00:00 2001 From: Andrea Haessly Date: Tue, 21 Apr 2020 17:00:22 -0400 Subject: [PATCH 85/85] update documentation --- .../tools/walkers/mutect/filtering/Mutect2AlleleFilter.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java index 81a1d10b969..b4606335ac7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/filtering/Mutect2AlleleFilter.java @@ -73,7 +73,8 @@ private static LinkedHashMap> combineDataByAllele(final Link /** - * Returns a list for each alternate allele which is the probability that the allele should be filtered out. + * Returns a list of probabilities, one for each alternate allele which is the probability that the allele should be filtered out. + * An empty list is returned when the filter is not/can not be evaluated. * @param vc * @param filteringEngine * @param referenceContext