Skip to content

Commit

Permalink
Fixes for tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ldgauthier committed Jul 22, 2019
1 parent 76f1681 commit ae1cd9b
Show file tree
Hide file tree
Showing 18 changed files with 98 additions and 110 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ private void initializeFeatureSources( final int featureQueryLookahead, final Co
if ( featureInput != null ) {
final Class<? extends Feature> featureType = getFeatureTypeForFeatureInputField(featureArgument.getKey());
addToFeatureSources(featureQueryLookahead, featureInput, featureType, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
toolInstance instanceof GATKTool ? ((GATKTool) toolInstance).getGenomicsDBOptions() : null);
toolInstance instanceof VariantWalker ? ((VariantWalker) toolInstance).getGenomicsDBOptions() : null);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.filters.CountingReadFilter;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions;
import org.broadinstitute.hellbender.utils.SimpleInterval;

import java.nio.file.Path;
import java.util.Spliterator;

/**
Expand Down Expand Up @@ -146,12 +144,4 @@ protected final void onShutdown() {
if ( drivingVariants != null )
drivingVariants.close();
}

/**
* Does this tool need sample genotypes to be called if reading from a GenomicsDB?
* @return if false, GTs remain no-calls, as in CombineGVCFs
*/
protected boolean doGenotypeCalling() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public GnarlyGenotyperEngine(final boolean keepAllSites, final int maxAltAlleles
}

@SuppressWarnings({"unchecked", "rawtypes"})
public static VariantContext finalizeGenotype(final VariantContext variant, final VariantContextBuilder annotationDBBuilder) {
public VariantContext finalizeGenotype(final VariantContext variant, final VariantContextBuilder annotationDBBuilder) {
//GenomicsDB or Evoquer merged all the annotations, but we still need to finalize MQ and QD annotations
//return a VC with the finalized annotations and dbBuilder gets the raw annotations for the database

Expand Down Expand Up @@ -245,7 +245,7 @@ public static VariantContext finalizeGenotype(final VariantContext variant, fina
* @param vc the input variant with NON_REF
* @return a GenotypesContext
*/
private static GenotypesContext iterateOnGenotypes(final VariantContext vc, final List<Allele> targetAlleles,
private GenotypesContext iterateOnGenotypes(final VariantContext vc, final List<Allele> targetAlleles,
final Map<Allele,Integer> targetAlleleCounts, final int[] SBsum,
final boolean nonRefReturned, final boolean summarizePLs,
final int[] rawGenotypeCounts) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public GenomicsDBOptions(final Path reference) {
/**
*
* @param reference Path to a reference. May be null. Needed only for reading from GenomicsDB.
* @param callGenotypes Indicated whether GenomicsDB should return called genotypes
* @param callGenotypes Indicates whether GenomicsDB should return called genotypes
*/
public GenomicsDBOptions(final Path reference, final boolean callGenotypes, final int maxAlternateAlleles) {
this.reference = reference;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public static GenomicsDBExportConfiguration.ExportConfiguration createExportConf
//Update combine operations for GnarlyGenotyper
//Note that this MQ format is deprecated, but was used by the prototype version of ReblockGVCF
vidMapPB = updateINFOFieldCombineOperation(vidMapPB, fieldNameToIndexInVidFieldsList,
GATKVCFConstants.MAPPING_QUALITY_DEPTH, "sum");
GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED, "sum");
vidMapPB = updateINFOFieldCombineOperation(vidMapPB, fieldNameToIndexInVidFieldsList,
GATKVCFConstants.RAW_QUAL_APPROX_KEY, "sum");
vidMapPB = updateINFOFieldCombineOperation(vidMapPB, fieldNameToIndexInVidFieldsList,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,10 @@
* </pre>
*
* <h3>Caveats</h3>
* <p><ul><li>Only GenomicsDB instances can be used as input for this tool.</li>
* <li>To generate all the annotations necessary for VQSR, input variants must include the QUALapprox, VarDP and MQ_DP
* <p><ul>
* <li>This tool does not subset to the best alternate alleles and can return highly, highly multialleic variants (>1000 alts for cohorts in the 10s of thousands). Only
* GenomicsDB instances should be used as input for this tool, because GenomicsDB will drop PLs for such sites to avoid excessive vcf size.</li>
* <li>To generate all the annotations necessary for VQSR, input variants must include the QUALapprox, VarDP and MQ_DP annotations
* </ul></p>
*
* <h3>Special note on ploidy</h3>
Expand All @@ -82,6 +84,7 @@
oneLineSummary = "Perform \"quick and dirty\" joint genotyping on one or more samples pre-called with HaplotypeCaller",
programGroup = ShortVariantDiscoveryProgramGroup.class)
@DocumentedFeature
@BetaFeature
public final class GnarlyGenotyper extends VariantWalker {

private static final OneShotLogger warning = new OneShotLogger(GnarlyGenotyper.class);
Expand All @@ -90,13 +93,6 @@ public final class GnarlyGenotyper extends VariantWalker {

public static final int PIPELINE_MAX_ALT_COUNT = 6;

private static GnarlyGenotyperEngine genotyperEngine;

private final RMSMappingQuality mqCalculator = RMSMappingQuality.getInstance();

private final Set<Class<? extends InfoFieldAnnotation>> allASAnnotations = new HashSet<>();


@Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
doc="File to which variants should be written", optional=false)
private File outputFile;
Expand Down Expand Up @@ -141,7 +137,10 @@ public final class GnarlyGenotyper extends VariantWalker {
private final DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();

private VariantContextWriter vcfWriter;
private VariantContextWriter annotationDBwriter = null;
private VariantContextWriter annotationDatabaseWriter = null;
private GnarlyGenotyperEngine genotyperEngine;
private final RMSMappingQuality mqCalculator = RMSMappingQuality.getInstance();
private final Set<Class<? extends InfoFieldAnnotation>> allAlleleSpecificAnnotations = new HashSet<>();

/** these are used when {@link #onlyOutputCallsStartingInIntervals) is true */
private List<SimpleInterval> intervals;
Expand Down Expand Up @@ -194,10 +193,10 @@ public void onTraversalStart() {

Reflections reflections = new Reflections("org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific");
//not InfoFieldAnnotation.class because we don't want AS_InbreedingCoeff
allASAnnotations.addAll(reflections.getSubTypesOf(AS_StrandBiasTest.class));
allASAnnotations.addAll(reflections.getSubTypesOf(AS_RankSumTest.class));
allASAnnotations.add(AS_RMSMappingQuality.class);
allASAnnotations.add(AS_QualByDepth.class);
allAlleleSpecificAnnotations.addAll(reflections.getSubTypesOf(AS_StrandBiasTest.class));
allAlleleSpecificAnnotations.addAll(reflections.getSubTypesOf(AS_RankSumTest.class));
allAlleleSpecificAnnotations.add(AS_RMSMappingQuality.class);
allAlleleSpecificAnnotations.add(AS_QualByDepth.class);
}

private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) {
Expand Down Expand Up @@ -230,7 +229,7 @@ private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) {

vcfWriter = createVCFWriter(outputFile);
if (outputDbName != null) {
annotationDBwriter = createVCFWriter(new File(outputDbName));
annotationDatabaseWriter = createVCFWriter(new File(outputDbName));
}

final Set<String> sampleNameSet = samples.asSetOfSamples();
Expand All @@ -243,7 +242,7 @@ private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) {
final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
vcfWriter.writeHeader(vcfHeader);
if (outputDbName != null) {
annotationDBwriter.writeHeader(dbHeader);
annotationDatabaseWriter.writeHeader(dbHeader);
}
}

Expand Down Expand Up @@ -271,12 +270,12 @@ public void apply(VariantContext variant, ReadsContext reads, ReferenceContext r
}

final VariantContextBuilder annotationDBBuilder = new VariantContextBuilder(variant);
final VariantContext finalizedVC = GnarlyGenotyperEngine.finalizeGenotype(variant, annotationDBBuilder);
final VariantContext finalizedVC = genotyperEngine.finalizeGenotype(variant, annotationDBBuilder);
if (finalizedVC != null && (!onlyOutputCallsStartingInIntervals || intervals.stream().anyMatch(interval -> interval.contains(variantStart)))) {
vcfWriter.add(finalizedVC);
}
if (annotationDBwriter != null) {
annotationDBwriter.add(annotationDBBuilder.make()); //we don't seem to have a sites-only option anymore, so do it manually
if (annotationDatabaseWriter != null) {
annotationDatabaseWriter.add(annotationDBBuilder.make()); //we don't seem to have a sites-only option anymore, so do it manually
}
}

Expand All @@ -285,8 +284,8 @@ public void closeTool() {
if ( vcfWriter != null) {
vcfWriter.close();
}
if (annotationDBwriter != null) {
annotationDBwriter.close();
if (annotationDatabaseWriter != null) {
annotationDatabaseWriter.close();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,13 @@
import java.util.*;

public final class AnnotationUtils {
public static final String ALLELE_SPECIFIC_SPLIT_DELIM = "\\|"; //String.split takes a regex, so we need to escape the pipe
public static final String ALLELE_SPECIFIC_PRINT_DELIM = "|";
public static final String ALLELE_SPECIFIC_REDUCED_DELIM = ",";

private AnnotationUtils(){}

public static final String ALLELE_SPECIFIC_SPLIT_REGEX = "\\|"; //String.split takes a regex, so we need to escape the pipe
public static final String BRACKET_REGEX = "\\[|\\]";
public static final String LIST_DELIMITER = ",";

public static final String AS_SPLIT_REGEX = "\\|"; //String.split takes a regex, so we need to escape the pipe
public static final String PRINT_DELIM = "|";
private AnnotationUtils(){}

/**
* Helper function to parse the list into the annotation string
Expand Down Expand Up @@ -54,7 +51,7 @@ public static String encodeStringList( final List<String> stringList) {
* @return a comma-separated String
*/
public static String encodeAnyASList( final List<?> somethingList) {
return StringUtils.join(somethingList, ALLELE_SPECIFIC_PRINT_DELIM).replaceAll("\\[|\\]", ""); //Who actually wants brackets at the ends of their string? Who???
return StringUtils.join(somethingList, ALLELE_SPECIFIC_PRINT_DELIM).replaceAll(BRACKET_REGEX, ""); //Who actually wants brackets at the ends of their string? Who???
}

/**
Expand Down Expand Up @@ -90,6 +87,6 @@ public static List<String> getAlleleLengthListOfString(String rawDataString) {
if (rawDataString.startsWith("[")) {
rawDataString = rawDataString.substring(1, rawDataString.length() - 1).replaceAll("\\s", "");
}
return Arrays.asList(rawDataString.split(ALLELE_SPECIFIC_SPLIT_DELIM));
return Arrays.asList(rawDataString.split(ALLELE_SPECIFIC_SPLIT_REGEX));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public final class RMSMappingQuality extends InfoFieldAnnotation implements Stan
private static final int NUM_LIST_ENTRIES = 2;
private static final int SUM_OF_SQUARES_INDEX = 0;
private static final int TOTAL_DEPTH_INDEX = 1;
private static final String OUTPUT_PRECISION = "%.2f";

@Override
public String getRawKeyName() { return GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY;} //new key for the two-value MQ data to prevent version mismatch catastrophes
Expand Down Expand Up @@ -126,10 +127,10 @@ public Map<String, Object> finalizeRawData(final VariantContext vc, final Varian
else if (vc.hasAttribute(getDeprecatedRawKeyName())) {
rawMQdata = vc.getAttributeAsString(getDeprecatedRawKeyName(), null);
//the original version of ReblockGVCF produces a different MQ format -- try to handle that gracefully here just in case those files go through GenotypeGVCFs
if (vc.hasAttribute("MQ_DP")) {
logger.warn("Presence of MQ_DP key indicates that this tool may be running on an older output of ReblockGVCF " +
if (vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
logger.warn("Presence of " + GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED + " key indicates that this tool may be running on an older output of ReblockGVCF " +
"that may not have compatible annotations with this GATK version. Attempting to reformat MQ data.");
final String rawMQdepth = vc.getAttributeAsString("MQ_DP",null);
final String rawMQdepth = vc.getAttributeAsString(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED,null);
if (rawMQdepth == null) {
throw new UserException.BadInput("MQ annotation data is not properly formatted. This version expects a " +
"long tuple of sum of squared MQ values and total reads over variant genotypes.");
Expand Down Expand Up @@ -159,7 +160,7 @@ else if (vc.hasAttribute(getDeprecatedRawKeyName())) {
}

private String makeFinalizedAnnotationString(final long numOfReads, final long sumOfSquaredMQs) {
return String.format("%.2f", Math.sqrt(sumOfSquaredMQs/(double)numOfReads));
return String.format(OUTPUT_PRECISION, Math.sqrt(sumOfSquaredMQs/(double)numOfReads));
}


Expand Down Expand Up @@ -216,7 +217,7 @@ public Map<String, Object> annotate(final ReferenceContext ref,

@VisibleForTesting
static String formattedValue(double rms) {
return String.format("%.2f", rms);
return String.format(OUTPUT_PRECISION, rms);
}

/**
Expand All @@ -229,11 +230,11 @@ static String formattedValue(double rms) {
public VariantContext finalizeRawMQ(final VariantContext vc) {
final String rawMQdata = vc.getAttributeAsString(getRawKeyName(), null);
if (rawMQdata == null) {
if (!vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH)) {
if (!vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
return vc;
}
if (vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH)) {
final int numOfReads = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH, getNumOfReads(vc)); //MQ_DP is an undocumented hack for the Gnarly Pipeline -- improved version uses RAW_MQ_and_DP tuple format (see #4969)
if (vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
final int numOfReads = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED, getNumOfReads(vc)); //MQ_DP is an undocumented hack for the Gnarly Pipeline -- improved version uses RAW_MQ_and_DP tuple format (see #4969)
final String deprecatedRawMQdata = vc.getAttributeAsString(getDeprecatedRawKeyName(), null);
final double squareSum = parseDeprecatedRawDataString(deprecatedRawMQdata);
final double rms = Math.sqrt(squareSum / (double)numOfReads);
Expand Down Expand Up @@ -265,7 +266,7 @@ private void parseRawDataString(ReducibleAnnotationData<List<Long>> myData) {
//TODO once the AS annotations have been added genotype gvcfs this can be removed for a more generic approach
private static List<Long> parseRawDataString(String rawDataString) {
try {
final String[] parsed = rawDataString.trim().replaceAll("\\[|\\]", "").split(", *");
final String[] parsed = rawDataString.trim().replaceAll(AnnotationUtils.BRACKET_REGEX, "").split(", *");
if (parsed.length != NUM_LIST_ENTRIES) {
throw new UserException.BadInput("Raw value for annotation has " + parsed.length + " values, expected " + NUM_LIST_ENTRIES);
}
Expand Down Expand Up @@ -300,8 +301,8 @@ private static double parseDeprecatedRawDataString(String rawDataString) {
*/
@VisibleForTesting
private static int getNumOfReads(final VariantContext vc) {
if(vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH)) {
int mqDP = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH, 0);
if(vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
int mqDP = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED, 0);
if (mqDP > 0) {
return mqDP;
}
Expand Down Expand Up @@ -356,9 +357,9 @@ private static boolean hasReferenceDepth(Genotype gt) {
static long getNumOfReads(final VariantContext vc,
final ReadLikelihoods<Allele> likelihoods) {
if(vc.hasAttribute(GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY)) {
List<Long> MQtuple = getAttributeAsLongList(vc, GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY,0L);
if (MQtuple.get(TOTAL_DEPTH_INDEX) > 0) {
return MQtuple.get(TOTAL_DEPTH_INDEX);
List<Long> mqTuple = getAttributeAsLongList(vc, GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY,0L);
if (mqTuple.get(TOTAL_DEPTH_INDEX) > 0) {
return mqTuple.get(TOTAL_DEPTH_INDEX);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ public Map<String, Object> finalizeRawData(VariantContext vc, VariantContext ori
final List<Integer> standardDepth;
if (originalVC.hasAttribute(GATKVCFConstants.AS_VARIANT_DEPTH_KEY)) {
standardDepth = Arrays.stream(originalVC.getAttributeAsString(GATKVCFConstants.AS_VARIANT_DEPTH_KEY, "")
.split(AnnotationUtils.AS_SPLIT_REGEX)).mapToInt(Integer::parseInt).boxed().collect(Collectors.toList());
.split(AnnotationUtils.ALLELE_SPECIFIC_SPLIT_REGEX)).mapToInt(Integer::parseInt).boxed().collect(Collectors.toList());
} else {
standardDepth = getAlleleDepths(genotypes);
}
Expand Down Expand Up @@ -192,7 +192,7 @@ public static String finalizeRawGVCFVarDPValues(final String rawAnnotationListWi
if (rawAnnotationListWithNonRef == null) {
return null;
}
List<String> dpValues = Arrays.asList(rawAnnotationListWithNonRef.split(AnnotationUtils.AS_SPLIT_REGEX));
List<String> dpValues = Arrays.asList(rawAnnotationListWithNonRef.split(AnnotationUtils.ALLELE_SPECIFIC_SPLIT_REGEX));
if (dpValues.size() != expectedFinalAlleleCount + 1) { //we expect a nonRef allele
return null;
}
Expand All @@ -216,7 +216,7 @@ public static List<Integer> parseQualList(final VariantContext vc) {
}
else if (vc.hasAttribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)) {
String asQuals = vc.getAttributeAsString(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, "").replaceAll("\\[\\]\\s","");
String[] values = asQuals.split(AnnotationUtils.AS_SPLIT_REGEX);
String[] values = asQuals.split(AnnotationUtils.ALLELE_SPECIFIC_SPLIT_REGEX);
if (values.length != vc.getNAlleles()+1) { //plus one because the non-ref place holder is still around
throw new IllegalStateException("Number of AS_QUALapprox values doesn't match the number of alleles in the variant context.");
}
Expand Down
Loading

0 comments on commit ae1cd9b

Please sign in to comment.