Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improved checking for upstream deletions in GenotypingEngine #6429

Merged
merged 3 commits into from
Feb 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package org.broadinstitute.hellbender.tools.walkers.genotyper;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.commons.math3.special.Gamma;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.*;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils;
Expand Down Expand Up @@ -38,7 +37,10 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl

protected final SampleList samples;

private final List<SimpleInterval> upstreamDeletionsLoc = new LinkedList<>();
// the top of the queue is the upstream deletion that ends first
// note that we can get away with ordering just by the end and not the contig as long as we preserve the invariant
// that everything in this queue belongs to the same contig
private final PriorityQueue<Locatable> upstreamDeletionsLoc = new PriorityQueue<>(Comparator.comparingInt(Locatable::getEnd));

private final boolean doAlleleSpecificCalcs;

Expand Down Expand Up @@ -154,6 +156,8 @@ && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles) && givenA

// start constructing the resulting VC
final List<Allele> outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
recordDeletions(vc, outputAlleles);

final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), vc.getContig(), vc.getStart(), vc.getEnd(), outputAlleles);

builder.log10PError(log10Confidence);
Expand Down Expand Up @@ -251,7 +255,6 @@ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult
if (toOutput) {
outputAlleles.add(allele);
mleCounts.add(afCalculationResult.getAlleleCountAtMLE(allele));
recordDeletion(referenceAlleleSize - allele.length(), vc);
}
}
}
Expand All @@ -264,18 +267,27 @@ void clearUpstreamDeletionsLoc() {
}

/**
* Record deletion to keep
* Add deletions to a list.
* Record emitted deletions in order to remove downstream spanning deletion alleles that are not covered by any emitted deletion.
* In addition to recording new deletions, this method culls previously-recorded deletions that end before the current variant
* context. This assumes that variants are traversed in order.
*
* @param deletionSize size of deletion in bases
* @param vc variant context
* @param vc VariantContext, potentially multiallelic and potentially containing one or more deletion alleles
* @param emittedAlleles The subset of the variant's alt alleles that are actually emitted
*/
void recordDeletion(final int deletionSize, final VariantContext vc) {
@VisibleForTesting
void recordDeletions(final VariantContext vc, final Collection<Allele> emittedAlleles) {
while (!upstreamDeletionsLoc.isEmpty() && (!upstreamDeletionsLoc.peek().contigsMatch(vc) || upstreamDeletionsLoc.peek().getEnd() < vc.getStart())) {
upstreamDeletionsLoc.poll();
}

for (final Allele allele : emittedAlleles) {
final int deletionSize = vc.getReference().length() - allele.length();

// In a deletion
if (deletionSize > 0) {
final SimpleInterval genomeLoc = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart() + deletionSize);
upstreamDeletionsLoc.add(genomeLoc);
// In a deletion
if (deletionSize > 0) {
final SimpleInterval genomeLoc = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart() + deletionSize);
upstreamDeletionsLoc.add(genomeLoc);
}
}
}

Expand All @@ -286,20 +298,11 @@ void recordDeletion(final int deletionSize, final VariantContext vc) {
* @return true if the location is covered by an upstream deletion, false otherwise
*/
boolean isVcCoveredByDeletion(final VariantContext vc) {
for (Iterator<SimpleInterval> it = upstreamDeletionsLoc.iterator(); it.hasNext(); ) {
final SimpleInterval loc = it.next();
if (!loc.getContig().equals(vc.getContig())) { // deletion is not on contig.
it.remove();
} else if (loc.getEnd() < vc.getStart()) { // deletion is before the start.
it.remove();
} else if (loc.getStart() == vc.getStart()) {
// ignore this deletion, the symbolic one does not make reference to it.
} else { // deletion covers.
return true;
}
}
// note: the code below seems like it's duplicating Locatable.overlaps, but here if the upstream deletion
// has the same start as the vc we don't want to count it
return !upstreamDeletionsLoc.isEmpty() && upstreamDeletionsLoc.stream()
.anyMatch(loc -> loc.getContig().equals(vc.getContig()) && loc.getStart() < vc.getStart() && vc.getStart() <= loc.getEnd());

return false;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public void init() {
final int deletionSize = refAllele.length() - altT.length();
final int start = 1;
final VariantContext deletionVC = new VariantContextBuilder("testDeletion", "1", start, start + deletionSize, allelesDel).make();
genotypingEngine.recordDeletion(deletionSize, deletionVC);
genotypingEngine.recordDeletions(deletionVC, Collections.singletonList(altT));
}

private static GenotypingEngine<?> getGenotypingEngine() {
Expand Down