Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cap likelihoods symmetrically #6196

Merged
merged 2 commits into from
Oct 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ public static CachingIndexedFastaSequenceFile createReferenceReader(final String
* @return never {@code null}.
*/
public static ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine(final LikelihoodEngineArgumentCollection likelihoodArgs) {
final double log10GlobalReadMismappingRate = likelihoodArgs.phredScaledGlobalReadMismappingRate < 0 ? - Double.MAX_VALUE
//AlleleLikelihoods::normalizeLikelihoods uses Double.NEGATIVE_INFINITY as a flag to disable capping
final double log10GlobalReadMismappingRate = likelihoodArgs.phredScaledGlobalReadMismappingRate < 0 ? Double.NEGATIVE_INFINITY
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docs say you can turn off the cap by setting this value negative, but it wasn't working before because subsequent methods were looking for -Inf.

: QualityUtils.qualToErrorProbLog10(likelihoodArgs.phredScaledGlobalReadMismappingRate);

switch ( likelihoodArgs.likelihoodEngineImplementation) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,10 @@ public void normalizeLikelihoods(final double maximumLikelihoodDifferenceCap) {
private void normalizeLikelihoodsPerEvidence(final double maximumBestAltLikelihoodDifference,
final double[][] sampleValues, final int sampleIndex, final int evidenceIndex) {

final BestAllele bestAlternativeAllele = searchBestAllele(sampleIndex,evidenceIndex,false);
//allow the best allele to be the reference because asymmetry leads to strange artifacts like het calls with >90% alt reads
final BestAllele bestAllele = searchBestAllele(sampleIndex,evidenceIndex,true);

final double worstLikelihoodCap = bestAlternativeAllele.likelihood + maximumBestAltLikelihoodDifference;
final double worstLikelihoodCap = bestAllele.likelihood + maximumBestAltLikelihoodDifference;

final int alleleCount = alleles.numberOfAlleles();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
import java.util.stream.Collectors;

public class CombineGVCFsIntegrationTest extends CommandLineProgramTest {
// If true, update the expected outputs in tests that assert an exact match vs. prior output,
// instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=HaplotypeCallerIntegrationTest"
// to update all of the exact-match tests at once. After you do this, you should look at the
// diffs in the new expected outputs in git to confirm that they are consistent with expectations.
public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false;

private static final List<String> NO_EXTRA_ARGS = Collections.emptyList();
private static final List<String> ATTRIBUTES_TO_IGNORE = Arrays.asList(
"RAW_MQ", //MQ data format and key have changed since GATK3
Expand All @@ -49,6 +55,14 @@ private static <T> void assertForEachElementInLists(final List<T> actual, final
}
}

/*
* Make sure that someone didn't leave the UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS toggle turned on
*/
@Test
public void assertThatExpectedOutputUpdateToggleIsDisabled() {
Assert.assertFalse(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS, "The toggle to update expected outputs should not be left enabled");
}

@DataProvider
public Object[][] gvcfsToCombine() {
return new Object[][]{
Expand Down Expand Up @@ -134,8 +148,8 @@ public void compareToGATK3(File[] inputs, File outputFile, List<String> extraArg
}

@Test(dataProvider = "gvcfsToCombine")
public void compareToGATK3ExpectedResults(File[] inputs, File outputFile, List<String> extraArgs, String reference) throws IOException, NoSuchAlgorithmException {
assertVariantContextsMatch(Arrays.asList(inputs), outputFile, extraArgs, reference, ATTRIBUTES_TO_IGNORE);
public void compareToExpectedResults(File[] inputs, File expected, List<String> extraArgs, String reference) throws IOException, NoSuchAlgorithmException {
assertVariantContextsMatch(Arrays.asList(inputs), expected, extraArgs, reference, ATTRIBUTES_TO_IGNORE);
}

public static void runProcess(ProcessController processController, String[] command) {
Expand All @@ -161,7 +175,7 @@ public void runCombineGVCFSandAssertSomething(List<File> inputs, File expected,

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(reference))
.addOutput(output);
.addOutput(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expected : output);
for (File input: inputs) {
args.addArgument("V", input.getAbsolutePath());
}
Expand All @@ -173,9 +187,11 @@ public void runCombineGVCFSandAssertSomething(List<File> inputs, File expected,
Utils.resetRandomGenerator();
runCommandLine(args);

final List<VariantContext> expectedVC = getVariantContexts(expected);
final List<VariantContext> actualVC = getVariantContexts(output);
assertForEachElementInLists(actualVC, expectedVC, assertion);
if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
final List<VariantContext> expectedVC = getVariantContexts(expected);
final List<VariantContext> actualVC = getVariantContexts(output);
assertForEachElementInLists(actualVC, expectedVC, assertion);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
Expand Down Expand Up @@ -1220,6 +1221,35 @@ public void testMaxAlternateAlleles(final String bam, final String reference, fi
}
}

@Test
public void testContaminatedHomVarDeletions() {
final String bam = toolsTestDir + "haplotypecaller/deletion_sample.snippet.bam";
final String intervals = "chr3:46373452";

final File outputContaminatedHomVarDeletions = createTempFile("testContaminatedHomVarDeletions", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder().addInput(new File(bam))
.addReference(hg38Reference)
.addInterval(new SimpleInterval(intervals))
.addOutput(outputContaminatedHomVarDeletions)
.addNumericArgument(IntervalArgumentCollection.INTERVAL_PADDING_LONG_NAME, 50);
runCommandLine(args);

List<VariantContext> vcs = VariantContextTestUtils.getVariantContexts(outputContaminatedHomVarDeletions);

//check known homozygous deletion for correct genotype
for (final VariantContext vc : vcs) {
final Genotype gt = vc.getGenotype(0);
if (gt.hasAD()) {
final int[] ads = gt.getAD();
final double alleleBalance = ads[1] / (ads[0] + ads[1]);
if (alleleBalance > 0.9) {
Assert.assertTrue(vc.getGenotype(0).isHomVar());
}
}
}
}

/**
* Helper method for testMaxAlternateAlleles
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,19 +282,18 @@ public void testNormalizeCapWorstLK(final String[] samples, final Allele[] allel
for (int a = 0; a < numberOfAlleles; a++)
newLikelihoods[s][a] = new double[sampleReadCount];
for (int r = 0; r < sampleReadCount; r++) {
double bestAltLk = Double.NEGATIVE_INFINITY;
double bestAlleleLk = Double.NEGATIVE_INFINITY;
//best likelihood can be alt OR reference
for (int a = 0; a < numberOfAlleles; a++) {
if (alleles[a].isReference())
continue;
bestAltLk = Math.max(bestAltLk, originalLikelihoods[s][a][r]);
bestAlleleLk = Math.max(bestAlleleLk, originalLikelihoods[s][a][r]);
}
if (bestAltLk == Double.NEGATIVE_INFINITY)
if (bestAlleleLk == Double.NEGATIVE_INFINITY)
for (int a = 0; a < numberOfAlleles; a++) {
newLikelihoods[s][a][r] = originalLikelihoods[s][a][r];
}
else
for (int a = 0; a < numberOfAlleles; a++) {
newLikelihoods[s][a][r] = Math.max(originalLikelihoods[s][a][r], bestAltLk - 0.001);
newLikelihoods[s][a][r] = Math.max(originalLikelihoods[s][a][r], bestAlleleLk - 0.001);
}
}
}
Expand Down
Binary file not shown.
Binary file not shown.
Loading