Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed out-of-bounds error in CountNs annotation #6355

Merged
merged 1 commit into from
Jan 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.Utils;
Expand Down Expand Up @@ -52,8 +54,21 @@ public List<VCFInfoHeaderLine> getDescriptions() {
@Override
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.N_COUNT_KEY); }

private Boolean doesReadHaveN(final GATKRead read, final VariantContext vc) {
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL, true);
return ( offset != ReadUtils.CLIPPING_GOAL_NOT_REACHED && !AlignmentUtils.isInsideDeletion(read.getCigar(), offset) && read.getBase(offset) == 'N');
@VisibleForTesting
static Boolean doesReadHaveN(final GATKRead read, final VariantContext vc) {

if (vc.getStart() < read.getStart() || read.getEnd() < vc.getStart()) {
return false;
}

final Pair<Integer, Boolean> offsetAndInsideDeletion = ReadUtils.getReadCoordinateForReferenceCoordinate(read, vc.getStart(), true);

if (offsetAndInsideDeletion.getRight()) {
return false;
} else {
final int offset = offsetAndInsideDeletion.getLeft();
return !(offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED || offset < 0 || offset >= read.getLength() || AlignmentUtils.isInsideDeletion(read.getCigar(), offset))
&& read.getBase(offset) == 'N';
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static void adjustQualsOfOverlappingPairedFragments(final Pair<GATKRead,
return;
}

final Pair<Integer, Boolean> offset = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getStart());
final Pair<Integer, Boolean> offset = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getStart(), false);
final int firstReadStop = (offset.getRight() ? offset.getLeft() + 1 : offset.getLeft());
final int numOverlappingBases = Math.min(firstRead.getLength() - firstReadStop, secondRead.getLength());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -701,26 +701,6 @@ public static int getReadCoordinateForReferenceCoordinate(final GATKRead read, f
return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refCoord, tail, false);
}

/**
* Returns the read coordinate corresponding to the requested reference coordinate.
*
* WARNING: if the requested reference coordinate happens to fall inside or just before a deletion (or skipped region) in the read, this function
* will return the last read base before the deletion (or skipped region). This function returns a
* Pair(int readCoord, boolean fallsInsideOrJustBeforeDeletionOrSkippedRegion) so you can choose which readCoordinate to use when faced with
* a deletion (or skipped region).
*
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
* behavior to your needs.
*
* @param read
* @param refCoord the requested reference coordinate
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
*/
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKRead read, int refCoord) {
return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refCoord, false);
}

/**
* Returns the read coordinate corresponding to the requested reference coordinate.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.apache.spark.sql.catalyst.expressions.aggregate.Count;
import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.testng.Assert;
import org.testng.annotations.Test;

import static org.testng.Assert.*;

public class CountNsUnitTest {

@Test
public void testDoesReadHaveN() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
final String contig = "chr1";

final int pos = 100;
final VariantContext vc = new VariantContextBuilder().chr(contig).start(pos).stop(pos).alleles("A","C").make();

final GATKRead nonOverlappingUpstreamRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 96, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
Assert.assertFalse(CountNs.doesReadHaveN(nonOverlappingUpstreamRead, vc));

final GATKRead nonOverlappingDownstreamRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 101, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
Assert.assertFalse(CountNs.doesReadHaveN(nonOverlappingDownstreamRead, vc));

final GATKRead spanningDeletionRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 95, new byte[] {'N','N','N','N','N','N'}, new byte[] {20,20,20,20,20,20}, "3M10D3M");
Assert.assertFalse(CountNs.doesReadHaveN(spanningDeletionRead, vc));

final GATKRead notN = ArtificialReadUtils.createArtificialRead(header, "read", 0, 99, new byte[] {'A','C','G'}, new byte[] {20,20,20}, "3M");
Assert.assertFalse(CountNs.doesReadHaveN(notN, vc));

final GATKRead yesN = ArtificialReadUtils.createArtificialRead(header, "read", 0, 99, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
Assert.assertTrue(CountNs.doesReadHaveN(yesN, vc));

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ public void testCalcNIndelInformativeReads(final String readBases, final String
final GATKRead readNoCache = ArtificialReadUtils.createArtificialRead(readBases.getBytes(), quals, cigar);
final SimpleInterval loc = new SimpleInterval("20", i + 1 + readStartIntoRef, i + 1 + readStartIntoRef);
final ReadPileup pileupCache = new ReadPileup(loc, Collections.singletonList(readCache), readCoordinateForReferenceCoordinate.getKey());
final ReadPileup pileupNoCache = new ReadPileup(loc, Collections.singletonList(readNoCache), ReadUtils.getReadCoordinateForReferenceCoordinate(readNoCache, readNoCache.getStart() + i).getKey());
final ReadPileup pileupNoCache = new ReadPileup(loc, Collections.singletonList(readNoCache), ReadUtils.getReadCoordinateForReferenceCoordinate(readNoCache, readNoCache.getStart() + i, false).getKey());
final int actualCache = model.calcNReadsWithNoPlausibleIndelsReads(pileupCache, i + readStartIntoRef, ref.getBytes(), maxIndelSize);
final int actualNoCache = model.calcNReadsWithNoPlausibleIndelsReads(pileupNoCache, i + readStartIntoRef, ref.getBytes(), maxIndelSize);
Assert.assertEquals(actualCache, (int)expected.get(i), "cached result failed at position " + i);
Expand Down