diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index e739a81cb10..d7860e1f324 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -18,7 +18,9 @@ import org.broadinstitute.hellbender.utils.IndexUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.codecs.FeaturesHeader; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.genomicsdb.model.GenomicsDBExportConfiguration; import org.genomicsdb.reader.GenomicsDBFeatureReader; @@ -26,17 +28,16 @@ import java.io.File; import java.io.IOException; import java.lang.reflect.InvocationTargetException; -import java.net.URI; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.Iterator; import java.util.List; import java.util.Optional; import java.util.function.Function; import static org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.createExportConfiguration; +import static org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.BCI_FILE_EXTENSION; /** * Enables traversals and queries over sources of Features, which are metadata associated with a location @@ -305,7 +306,8 @@ public FeatureDataSource(final FeatureInput featureInput, final int queryLook * @param setNameOnCodec If true, and if this FeatureDataSource uses a NameAwareCodec, the name of the FeatureInput will be used to set the codec's name. This exists as a mechanism to store the FeatureInput name in the source field of VariantContexts */ public FeatureDataSource(final FeatureInput featureInput, final int queryLookaheadBases, final Class targetFeatureType, - final int cloudPrefetchBuffer, final int cloudIndexPrefetchBuffer, final GenomicsDBOptions genomicsDBOptions, final boolean setNameOnCodec) { + final int cloudPrefetchBuffer, final int cloudIndexPrefetchBuffer, final GenomicsDBOptions genomicsDBOptions, + final boolean setNameOnCodec) { Utils.validateArg(queryLookaheadBases >= 0, "Query lookahead bases must be >= 0"); this.featureInput = Utils.nonNull(featureInput, "featureInput must not be null"); if (IOUtils.isGenomicsDBPath(featureInput)) { @@ -319,12 +321,14 @@ public FeatureDataSource(final FeatureInput featureInput, final int queryLook BucketUtils.getPrefetchingWrapper(cloudIndexPrefetchBuffer), genomicsDBOptions, setNameOnCodec); - if (IOUtils.isGenomicsDBPath(featureInput)) { + if (IOUtils.isGenomicsDBPath(featureInput) || + featureInput.getFeaturePath().toLowerCase().endsWith(BCI_FILE_EXTENSION)) { //genomics db uri's have no associated index file to read from, but they do support random access + // likewise with block-compressed interval files this.hasIndex = false; this.supportsRandomAccess = true; } else if (featureReader instanceof AbstractFeatureReader) { - this.hasIndex = ((AbstractFeatureReader) featureReader).hasIndex(); + this.hasIndex = ((AbstractFeatureReader)featureReader).hasIndex(); this.supportsRandomAccess = hasIndex; } else { throw new GATKException("Found a feature input that was neither GenomicsDB or a Tribble AbstractFeatureReader. Input was " + featureInput.toString() + "."); @@ -345,7 +349,7 @@ final void printCacheStats() { queryCache.printCacheStatistics( getName() ); } - @SuppressWarnings("unchecked") + @SuppressWarnings({"unchecked", "rawtypes"}) private static FeatureReader getFeatureReader(final FeatureInput featureInput, final Class targetFeatureType, final Function cloudWrapper, final Function cloudIndexWrapper, @@ -367,6 +371,9 @@ private static FeatureReader getFeatureReader(final Featu } } else { final FeatureCodec codec = getCodecForFeatureInput(featureInput, targetFeatureType, setNameOnCodec); + if ( featureInput.getFeaturePath().toLowerCase().endsWith(BCI_FILE_EXTENSION) ) { + return new Reader(featureInput, codec); + } return getTribbleFeatureReader(featureInput, codec, cloudWrapper, cloudIndexWrapper); } } @@ -380,7 +387,8 @@ private static FeatureReader getFeatureReader(final Featu */ @SuppressWarnings("unchecked") private static FeatureCodec getCodecForFeatureInput(final FeatureInput featureInput, - final Class targetFeatureType, final boolean setNameOnCodec) { + final Class targetFeatureType, + final boolean setNameOnCodec) { final FeatureCodec codec; final Class> codecClass = featureInput.getFeatureCodecClass(); if (codecClass == null) { @@ -391,6 +399,10 @@ private static FeatureReader getFeatureReader(final Featu } else { try { codec = codecClass.getDeclaredConstructor().newInstance(); + if ( !codec.canDecode(featureInput.toPath().toAbsolutePath().toUri().toString()) ) { + throw new GATKException(codec.getClass().getSimpleName() + " cannot decode " + + featureInput.toPath().toString()); + } } catch (final InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) { throw new GATKException("Unable to automatically instantiate codec " + codecClass.getName()); } @@ -461,7 +473,9 @@ protected static FeatureReader getGenomicsDBFeatureReader(final public SAMSequenceDictionary getSequenceDictionary() { SAMSequenceDictionary dict = null; final Object header = getHeader(); - if (header instanceof VCFHeader) { + if ( header instanceof FeaturesHeader ) { + dict = ((FeaturesHeader)header).getDictionary(); + } else if (header instanceof VCFHeader) { dict = ((VCFHeader) header).getSequenceDictionary(); } if (dict != null && !dict.isEmpty()) { diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java index eb55409bdf7..a24c0897e1a 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java @@ -246,6 +246,11 @@ void addToFeatureSources(final int featureQueryLookahead, final FeatureInput(featureInput, featureQueryLookahead, featureType, cloudPrefetchBuffer, cloudIndexPrefetchBuffer, genomicsDBOptions)); } + void addToFeatureSources (final FeatureInput featureInput, + final FeatureDataSource featureDataSource) { + featureSources.put(featureInput, featureDataSource); + } + /** * Given a ArgumentDefinition for an argument known to be of type FeatureInput (or a Collection thereof), retrieves the type * parameter for the FeatureInput (eg., for FeatureInput or List> @@ -375,6 +380,22 @@ public Iterator getFeatureIterator(final FeatureInput return dataSource.iterator(); } + /** + * As above, but takes an optional list of intervals to examine. + * @param featureDescriptor FeatureInput to scan + * @param intervals The userIntervals to examine (may be null) + * @param Feature type + * @return An iterator over the Features + */ + public Iterator getFeatureIterator( final FeatureInput featureDescriptor, + final List intervals ) { + final FeatureDataSource dataSource = lookupDataSource(featureDescriptor); + dataSource.setIntervalsForTraversal(intervals); + final Iterator itr = dataSource.iterator(); + dataSource.setIntervalsForTraversal(null); + return itr; + } + /** * Get the header associated with a particular FeatureInput * diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureWalker.java index ba4678c2b74..c9ee3fbd6d1 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureWalker.java @@ -1,9 +1,11 @@ package org.broadinstitute.hellbender.engine; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.tribble.Feature; import htsjdk.tribble.FeatureCodec; import org.broadinstitute.hellbender.engine.filters.CountingReadFilter; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -57,12 +59,18 @@ private void initializeDrivingFeatures() { final GATKPath drivingPath = getDrivingFeaturePath(); final FeatureCodec codec = FeatureManager.getCodecForFile(drivingPath.toPath()); if (isAcceptableFeatureType(codec.getFeatureType())) { - drivingFeatures = new FeatureDataSource<>(new FeatureInput<>(drivingPath), FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, null, cloudPrefetchBuffer, cloudIndexPrefetchBuffer, referenceArguments.getReferencePath()); - - final FeatureInput drivingFeaturesInput = new FeatureInput<>(drivingPath, "drivingFeatureFile"); - features.addToFeatureSources(0, drivingFeaturesInput, codec.getFeatureType(), cloudPrefetchBuffer, cloudIndexPrefetchBuffer, - referenceArguments.getReferencePath()); - header = getHeaderForFeatures(drivingFeaturesInput); + final GenomicsDBOptions options = new GenomicsDBOptions(referenceArguments.getReferencePath()); + final FeatureInput drivingFeatureInput = new FeatureInput<>(drivingPath); + drivingFeatureInput.setFeatureCodecClass((Class>)codec.getClass()); + drivingFeatures = new FeatureDataSource<>(drivingFeatureInput, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, null, + cloudPrefetchBuffer, cloudIndexPrefetchBuffer, options, false); + header = drivingFeatures.getHeader(); + + final FeatureInput featureInput = new FeatureInput<>(drivingPath, "drivingFeatureFile"); + featureInput.setFeatureCodecClass((Class>)codec.getClass()); + features.addToFeatureSources(featureInput, + new FeatureDataSource<>(featureInput, 0, codec.getFeatureType(), + cloudPrefetchBuffer, cloudIndexPrefetchBuffer, options, false)); } else { throw new UserException("File " + drivingPath.getRawInputString() + " contains features of the wrong type."); } diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index aa853f6014b..7ef9c3a7c34 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -660,7 +660,7 @@ public SAMSequenceDictionary getBestAvailableSequenceDictionary() { } else if (hasReads()){ return reads.getSequenceDictionary(); } else if (hasFeatures()){ - final List dictionaries = features.getVariantSequenceDictionaries(); + final List dictionaries = features.getAllSequenceDictionaries(); //If there is just one, it clearly is the best. Otherwise, none is best. if (dictionaries.size() == 1){ return dictionaries.get(0); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/StructuralVariantDiscoverer.java b/src/main/java/org/broadinstitute/hellbender/tools/StructuralVariantDiscoverer.java index 289568bb585..39d28daccf2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/StructuralVariantDiscoverer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/StructuralVariantDiscoverer.java @@ -36,7 +36,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SimpleNovelAdjacencyAndChimericAlignmentEvidence; import org.broadinstitute.hellbender.tools.spark.sv.utils.CNVInputReader; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVVCFWriter; import org.broadinstitute.hellbender.utils.BaseUtils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java index 894cc5abfa4..1ac964daeac 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java @@ -32,6 +32,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.evidence.FindBreakpointEvidenceSpark; import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; import org.broadinstitute.hellbender.tools.spark.sv.utils.*; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.SequenceDictionaryUtils; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducer.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducer.java index 7686963ad25..b9b0eebddc7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducer.java @@ -11,6 +11,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.evidence.EvidenceTargetLink; import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; import org.broadinstitute.hellbender.tools.spark.sv.utils.*; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.Utils; import scala.Tuple2; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java index d95e93a8b32..7fd0b07efe1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java @@ -22,7 +22,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigAlignmentsRDDProcessor; import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.ContigChimericAlignmentIterativeInterpreter; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVVCFWriter; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVType.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVType.java index af9cca4c7e9..67b6c465eae 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVType.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVType.java @@ -10,7 +10,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.evidence.EvidenceTargetLink; import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.SimpleInterval; import java.util.Collections; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java index ff382e8ae3f..db3d80ba603 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java @@ -28,7 +28,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.CpxVariantInterpreter; import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SegmentedCpxVariantSimpleVariantExtractor; import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SimpleNovelAdjacencyInterpreter; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVVCFWriter; import org.broadinstitute.hellbender.utils.io.IOUtils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryInputMetaData.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryInputMetaData.java index 0c5a35952b8..deeddff4c63 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryInputMetaData.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryInputMetaData.java @@ -11,8 +11,8 @@ import org.broadinstitute.hellbender.tools.spark.sv.evidence.EvidenceTargetLink; import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; import org.broadinstitute.hellbender.tools.spark.sv.utils.PairedStrandedIntervalTree; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; import java.util.List; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryUtils.java index ac03ec8eec8..88e795a824f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryUtils.java @@ -5,8 +5,8 @@ import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigAlignmentsArgumentCollection; import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.BreakpointComplications; import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.NovelAdjacencyAndAltHaplotype; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVVCFReader; import org.broadinstitute.hellbender.utils.SimpleInterval; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentInterval.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentInterval.java index 8af04fb95c2..5e2dc9d012e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentInterval.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentInterval.java @@ -7,7 +7,7 @@ import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.*; import htsjdk.samtools.util.SequenceUtil; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.Strand; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Tail; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ContigChimericAlignmentIterativeInterpreter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ContigChimericAlignmentIterativeInterpreter.java index f8f510c2d28..005ae05b124 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ContigChimericAlignmentIterativeInterpreter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ContigChimericAlignmentIterativeInterpreter.java @@ -18,8 +18,8 @@ import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import scala.Tuple2; import java.util.ArrayList; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractor.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractor.java index 37ab74c77d4..d1ccd10c98f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractor.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractor.java @@ -15,7 +15,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputMetaData; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.read.GATKRead; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyAndChimericAlignmentEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyAndChimericAlignmentEvidence.java index f3456b3a0c8..1208d1ada59 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyAndChimericAlignmentEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyAndChimericAlignmentEvidence.java @@ -19,8 +19,8 @@ import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.Utils; import java.io.Serializable; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyInterpreter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyInterpreter.java index 5a8cb9cc71b..1211f10de2f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyInterpreter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyInterpreter.java @@ -13,8 +13,8 @@ import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvType; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import scala.Tuple2; import java.util.List; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/AlignedAssemblyOrExcuse.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/AlignedAssemblyOrExcuse.java index 39a2ec29ddb..f5ffffc058c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/AlignedAssemblyOrExcuse.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/AlignedAssemblyOrExcuse.java @@ -7,7 +7,7 @@ import htsjdk.samtools.*; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVFileUtils; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; import org.broadinstitute.hellbender.utils.SequenceDictionaryUtils; import org.broadinstitute.hellbender.utils.Utils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointDensityFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointDensityFilter.java index a61aaaa6a72..acd4f6ec3d6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointDensityFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointDensityFilter.java @@ -2,6 +2,8 @@ import com.google.common.annotations.VisibleForTesting; import org.broadinstitute.hellbender.tools.spark.sv.utils.*; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.Utils; import scala.Tuple2; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidence.java index f11a5f9e3ae..e7eabb74b21 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidence.java @@ -8,7 +8,7 @@ import htsjdk.samtools.Cigar; import htsjdk.samtools.TextCigarCodec; import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.TextMDCodec; import org.broadinstitute.hellbender.utils.read.GATKRead; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidenceClusterer.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidenceClusterer.java index d15c790cf3f..4f8137ef033 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidenceClusterer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidenceClusterer.java @@ -1,7 +1,7 @@ package org.broadinstitute.hellbender.tools.spark.sv.evidence; import org.apache.commons.collections4.iterators.SingletonIterator; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import java.util.ArrayList; import java.util.Collections; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceOverlapChecker.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceOverlapChecker.java index 7ce7ef2da37..c2ada3a1083 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceOverlapChecker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceOverlapChecker.java @@ -1,8 +1,8 @@ package org.broadinstitute.hellbender.tools.spark.sv.evidence; import org.apache.commons.lang3.tuple.ImmutablePair; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.utils.Utils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLink.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLink.java index 9d6127d1576..84a59af9876 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLink.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLink.java @@ -5,7 +5,7 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import org.broadinstitute.hellbender.tools.spark.sv.utils.PairedStrandedIntervals; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.utils.Utils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLinkClusterer.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLinkClusterer.java index fb05463077d..9befbbcce28 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLinkClusterer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLinkClusterer.java @@ -2,7 +2,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.utils.PairedStrandedIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.utils.PairedStrandedIntervals; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.utils.Utils; import scala.Tuple2; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/ExtractSVEvidenceSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/ExtractSVEvidenceSpark.java index 7610d317164..279b3978473 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/ExtractSVEvidenceSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/ExtractSVEvidenceSpark.java @@ -12,8 +12,8 @@ import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; import org.broadinstitute.hellbender.engine.spark.GATKSparkTool; import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.utils.FlatMapGluer; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBreakpointEvidenceSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBreakpointEvidenceSpark.java index ff1dc17d3a6..45cd1600623 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBreakpointEvidenceSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBreakpointEvidenceSpark.java @@ -28,6 +28,8 @@ import org.broadinstitute.hellbender.tools.spark.sv.utils.*; import org.broadinstitute.hellbender.tools.spark.utils.FlatMapGluer; import org.broadinstitute.hellbender.tools.spark.utils.HopscotchUniqueMultiMapSpark; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.bwa.BwaMemIndexCache; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/IntervalCoverageFinder.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/IntervalCoverageFinder.java index 5c745976adb..5f927335b00 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/IntervalCoverageFinder.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/IntervalCoverageFinder.java @@ -5,7 +5,7 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.read.GATKRead; import scala.Tuple2; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/KSWindowFinder.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/KSWindowFinder.java index 26a0847ee38..563b4244f70 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/KSWindowFinder.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/KSWindowFinder.java @@ -1,7 +1,7 @@ package org.broadinstitute.hellbender.tools.spark.sv.evidence; import com.google.common.annotations.VisibleForTesting; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; import org.broadinstitute.hellbender.tools.spark.utils.IntHistogram; import org.broadinstitute.hellbender.utils.read.GATKRead; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/PartitionCrossingChecker.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/PartitionCrossingChecker.java index a217f9e9907..c90393bada3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/PartitionCrossingChecker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/PartitionCrossingChecker.java @@ -1,6 +1,6 @@ package org.broadinstitute.hellbender.tools.spark.sv.evidence; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; /** It allows you to ask whether a given interval is near the beginning or end of the partition. * This is useful in handling breakpoints for which the evidence is split across two partitions. */ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/QNameFinder.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/QNameFinder.java index fec336b5b99..904298fd2fa 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/QNameFinder.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/QNameFinder.java @@ -1,8 +1,8 @@ package org.broadinstitute.hellbender.tools.spark.sv.evidence; import org.apache.commons.collections4.iterators.SingletonIterator; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.read.GATKRead; import java.util.Collections; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/ReadClassifier.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/ReadClassifier.java index be54f3a5242..c60d41041cf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/ReadClassifier.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/ReadClassifier.java @@ -3,8 +3,8 @@ import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.read.GATKRead; import java.util.*; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/SVReadFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/SVReadFilter.java index 5f2d4b84feb..c60d924656f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/SVReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/SVReadFilter.java @@ -1,7 +1,7 @@ package org.broadinstitute.hellbender.tools.spark.sv.evidence; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.FindBreakpointEvidenceSparkArgumentCollection; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; import org.broadinstitute.hellbender.utils.read.CigarUtils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/XGBoostEvidenceFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/XGBoostEvidenceFilter.java index 36d298303cf..8df91ce4142 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/XGBoostEvidenceFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/XGBoostEvidenceFilter.java @@ -12,8 +12,9 @@ import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection; -import org.broadinstitute.hellbender.tools.spark.sv.utils.*; import org.broadinstitute.hellbender.tools.spark.utils.IntHistogram; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/CNVInputReader.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/CNVInputReader.java index 1b316ec2e15..6807f10ee4e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/CNVInputReader.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/CNVInputReader.java @@ -5,6 +5,8 @@ import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.vcf.VCFHeader; import org.broadinstitute.hellbender.engine.FeatureDataSource; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.Utils; import scala.Tuple2; @@ -17,7 +19,7 @@ public class CNVInputReader { * are based on the sequence indices in the SAM header, _NOT_ the ReadMetadata (which we might not have access to at this * time). */ - public static SVIntervalTree loadCNVCalls(final String cnvCallsFile, + public static SVIntervalTree loadCNVCalls( final String cnvCallsFile, final SAMFileHeader headerForReads) { Utils.validate(cnvCallsFile != null, "Can't load null CNV calls file"); try ( final FeatureDataSource dataSource = new FeatureDataSource<>(cnvCallsFile, null, 0, null) ) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/PairedStrandedIntervalTree.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/PairedStrandedIntervalTree.java index a48749373eb..64df6f34479 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/PairedStrandedIntervalTree.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/PairedStrandedIntervalTree.java @@ -4,6 +4,7 @@ import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.Utils; import scala.Tuple2; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVContext.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVContext.java index d772a55f39e..dd7d9636a5d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVContext.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVContext.java @@ -7,6 +7,7 @@ import org.broadinstitute.hellbender.engine.spark.datasources.ReferenceMultiSparkSource; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVFileUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVFileUtils.java index 4d38c94b68c..5380d965624 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVFileUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVFileUtils.java @@ -4,6 +4,7 @@ import htsjdk.samtools.util.FileExtensions; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.spark.utils.HopscotchSetSpark; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; @@ -88,8 +89,8 @@ public static void writeKmersFile(final String kmersFileP } /** Read intervals from file. */ - public static List readIntervalsFile(final String intervalsFilePath, - final Map contigNameMap ) { + public static List readIntervalsFile( final String intervalsFilePath, + final Map contigNameMap ) { Utils.nonNull(intervalsFilePath, "provided intervals file path is null"); Utils.nonNull(contigNameMap, "provided map for contig index lookup is null"); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFReader.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFReader.java index ba78e09e140..0adb72b4e44 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFReader.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFReader.java @@ -5,11 +5,13 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; public class SVVCFReader { - public static SVIntervalTree readBreakpointsFromTruthVCF(final String truthVCF, - final SAMSequenceDictionary dictionary, - final int padding ) { + public static SVIntervalTree readBreakpointsFromTruthVCF( final String truthVCF, + final SAMSequenceDictionary dictionary, + final int padding ) { SVIntervalTree breakpoints = new SVIntervalTree<>(); try ( final FeatureDataSource dataSource = new FeatureDataSource<>(truthVCF, null, 0, VariantContext.class) ) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/StrandedInterval.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/StrandedInterval.java index c2f1ee4f998..03c3b8bd90c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/StrandedInterval.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/StrandedInterval.java @@ -4,6 +4,7 @@ import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.Utils; /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java index 99fbffb374d..5bdfb27d8de 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java @@ -6,12 +6,13 @@ import java.util.Objects; public final class BafEvidence implements Feature { - final String sample; final String contig; final int position; final double value; + public final static String BCI_VERSION = "1.0"; + public BafEvidence(final String sample, final String contig, final int position, final double value) { Utils.nonNull(sample); Utils.nonNull(contig); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/DepthEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/DepthEvidence.java index 55310995cd2..3e9c046273c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/DepthEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/DepthEvidence.java @@ -13,6 +13,8 @@ public final class DepthEvidence implements Feature { final int end; final int[] counts; + public static final String BCI_VERSION = "1.0"; + public DepthEvidence(final String contig, int start, final int end, final int[] counts) { Utils.nonNull(contig); Utils.nonNull(counts); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidence.java index f0079c82f6d..ae9e40671df 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidence.java @@ -16,6 +16,8 @@ public final class DiscordantPairEvidence implements Feature { final boolean startStrand; final boolean endStrand; + public final static String BCI_VERSION = "1.0"; + public DiscordantPairEvidence(final String sample, final String startContig, final int start, final boolean startStrand, final String endContig, final int end, final boolean endStrand) { Utils.nonNull(sample); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java new file mode 100644 index 00000000000..5cdf8c3ddf4 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java @@ -0,0 +1,97 @@ +package org.broadinstitute.hellbender.tools.sv; + +import com.google.common.annotations.VisibleForTesting; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.Locatable; +import htsjdk.tribble.Feature; +import org.broadinstitute.hellbender.utils.Nucleotide; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +@VisibleForTesting +public final class LocusDepth implements Feature { + private final String contig; + private final int position; + private final byte refCall; // index into nucleotideValues + private int[] depths; + public final static String BCI_VERSION = "1.0"; + + // our own private copy so that we don't make repeated array allocations + private final static Nucleotide[] nucleotideValues = Nucleotide.values(); + + public LocusDepth( final Locatable loc, final byte refCall ) { + this.contig = loc.getContig(); + this.position = loc.getStart(); + this.refCall = refCall; + this.depths = new int[4]; + } + + public LocusDepth( final String contig, final int position, final byte refCall, + final int aDepth, final int cDepth, final int gDepth, final int tDepth ) { + this.contig = contig; + this.position = position; + this.refCall = refCall; + this.depths = new int[4]; + depths[0] = aDepth; + depths[1] = cDepth; + depths[2] = gDepth; + depths[3] = tDepth; + } + + public LocusDepth( final DataInputStream dis, final SAMSequenceDictionary dict ) throws IOException { + contig = dict.getSequence(dis.readInt()).getSequenceName(); + position = dis.readInt(); + refCall = dis.readByte(); + depths = new int[4]; + depths[0] = dis.readInt(); + depths[1] = dis.readInt(); + depths[2] = dis.readInt(); + depths[3] = dis.readInt(); + } + + public void observe( final int idx ) { + depths[idx] += 1; + } + + @Override + public String getContig() { + return contig; + } + + @Override + public int getEnd() { + return position; + } + + @Override + public int getStart() { + return position; + } + + public char getRefCall() { + return (char)refCall; + } + + public int getADepth() { return depths[0]; } + public int getCDepth() { return depths[1]; } + public int getGDepth() { return depths[2]; } + public int getTDepth() { return depths[3]; } + + public void write( final DataOutputStream dos, + final SAMSequenceDictionary dict ) throws IOException { + dos.writeInt(dict.getSequenceIndex(contig)); + dos.writeInt(position); + dos.writeByte(refCall); + dos.writeInt(depths[0]); + dos.writeInt(depths[1]); + dos.writeInt(depths[2]); + dos.writeInt(depths[3]); + } + + public String toString() { + return contig + "\t" + position + "\t" + (char)refCall + "\t" + + depths[0] + "\t" + depths[1] + "\t" + depths[2] + "\t" + depths[3]; + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidence.java index b80f7401252..b1e1920db14 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidence.java @@ -1,7 +1,9 @@ package org.broadinstitute.hellbender.tools.sv; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.Feature; -import htsjdk.tribble.FeatureCodec; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.argparser.ExperimentalFeature; @@ -10,15 +12,16 @@ import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; import org.broadinstitute.hellbender.engine.*; import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.utils.codecs.BafEvidenceCodec; -import org.broadinstitute.hellbender.utils.codecs.DepthEvidenceCodec; -import org.broadinstitute.hellbender.utils.codecs.DiscordantPairEvidenceCodec; -import org.broadinstitute.hellbender.utils.codecs.SplitReadEvidenceCodec; -import org.broadinstitute.hellbender.utils.io.FeatureOutputStream; +import org.broadinstitute.hellbender.utils.codecs.*; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPOutputStream; /** - * Prints SV evidence records. Can be used with -L to retrieve records on a set of intervals. Supports streaming input - * from GCS buckets. + * Prints SV evidence records. Can be used with -L to retrieve records on a set of intervals. + * Supports streaming input from GCS buckets. * *

Inputs

* @@ -27,7 +30,10 @@ * Coordinate-sorted and indexed evidence file URI * *
  • - * Reference sequence dictionary + * Sequence dictionary (or reference) if the input file is tab-delimited text + *
  • + *
  • + * Sample name(s) if the input file is tab-delimited text other than read depth *
  • * * @@ -35,7 +41,8 @@ * *
      *
    • - * Coordinate-sorted evidence file, automatically indexed if ending with ".gz" + * Coordinate-sorted evidence file with a name that matches the input file evidence type, + * automatically indexed if ending with ".gz" or ".bci" *
    • *
    * @@ -59,7 +66,7 @@ ) @ExperimentalFeature @DocumentedFeature -public final class PrintSVEvidence extends FeatureWalker { +public final class PrintSVEvidence extends FeatureWalker { public static final String EVIDENCE_FILE_NAME = "evidence-file"; public static final String COMPRESSION_LEVEL_NAME = "compression-level"; @@ -68,15 +75,17 @@ public final class PrintSVEvidence extends FeatureWalker { doc = "Input file URI with extension '" + SplitReadEvidenceCodec.FORMAT_SUFFIX + "', '" + DiscordantPairEvidenceCodec.FORMAT_SUFFIX + "', '" + + LocusDepthCodec.FORMAT_SUFFIX + "', '" + BafEvidenceCodec.FORMAT_SUFFIX + "', or '" - + DepthEvidenceCodec.FORMAT_SUFFIX + "' (may be gzipped).", + + DepthEvidenceCodec.FORMAT_SUFFIX + "' (may be gzipped). " + + "Can also handle bci rather than txt files.", fullName = EVIDENCE_FILE_NAME ) private GATKPath inputFilePath; @Argument( doc = "Output file with an evidence extension matching the input. Will be indexed if it has a " + - "block-compressed extension (e.g. '.gz').", + "block-compressed extension (e.g. '.gz' or '.bci').", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) @@ -89,17 +98,43 @@ public final class PrintSVEvidence extends FeatureWalker { ) private int compressionLevel = 4; - private FeatureOutputStream peStream; - private FeatureOutputStream srStream; - private FeatureOutputStream bafStream; - private FeatureOutputStream rdStream; - private FeatureCodec featureCodec; - private Class evidenceClass; + @Argument(doc = "List of sample names", fullName = "sample-names", optional = true) + private List sampleNames = new ArrayList<>(); + + @Argument(doc = "Output file for sample names", fullName = "sample-list-dump", optional = true) + private GATKPath sampleListOutputFile; + + @Argument(doc = "Output file for contig dictionary", fullName = "sequence-dict-dump", optional = true) + private GATKPath dictionaryOutputFile; + + private FeatureSink outputSink; + private Class evidenceClass; + + private static final List>> outputCodecs = + new ArrayList<>(10); + static { + outputCodecs.add(new BafEvidenceCodec()); + outputCodecs.add(new DepthEvidenceCodec()); + outputCodecs.add(new DiscordantPairEvidenceCodec()); + outputCodecs.add(new LocusDepthCodec()); + outputCodecs.add(new SplitReadEvidenceCodec()); + outputCodecs.add(new BafEvidenceBCICodec()); + outputCodecs.add(new DepthEvidenceBCICodec()); + outputCodecs.add(new DiscordantPairEvidenceBCICodec()); + outputCodecs.add(new LocusDepthBCICodec()); + outputCodecs.add(new SplitReadEvidenceBCICodec()); + } @Override - protected boolean isAcceptableFeatureType(final Class featureType) { - return featureType.equals(BafEvidence.class) || featureType.equals(DepthEvidence.class) - || featureType.equals(DiscordantPairEvidence.class) || featureType.equals(SplitReadEvidence.class); + @SuppressWarnings("unchecked") + protected boolean isAcceptableFeatureType( final Class featureType ) { + for ( final FeatureOutputCodec codec : outputCodecs ) { + if ( featureType.equals(codec.getFeatureType()) ) { + evidenceClass = (Class)featureType; + return true; + } + } + return false; } @Override @@ -110,77 +145,101 @@ public GATKPath getDrivingFeaturePath() { @Override public void onTraversalStart() { super.onTraversalStart(); - featureCodec = FeatureManager.getCodecForFile(inputFilePath.toPath()); - evidenceClass = featureCodec.getFeatureType(); - initializeOutput(); - writeHeader(); + final FeaturesHeader header = getHeader(); + if ( sampleListOutputFile != null ) { + dumpSamples(sampleListOutputFile, header.getSampleNames()); + } + if ( dictionaryOutputFile != null ) { + dumpDictionary(dictionaryOutputFile, header.getDictionary()); + } + initializeOutput(header); } - private void initializeOutput() { - if (evidenceClass.equals(DiscordantPairEvidence.class)) { - peStream = new FeatureOutputStream<>(outputFilePath, featureCodec, DiscordantPairEvidenceCodec::encode, - getBestAvailableSequenceDictionary(), compressionLevel); - } else if (evidenceClass.equals(SplitReadEvidence.class)) { - srStream = new FeatureOutputStream<>(outputFilePath, featureCodec, SplitReadEvidenceCodec::encode, - getBestAvailableSequenceDictionary(), compressionLevel); - } else if (evidenceClass.equals(BafEvidence.class)) { - bafStream = new FeatureOutputStream<>(outputFilePath, featureCodec, BafEvidenceCodec::encode, - getBestAvailableSequenceDictionary(), compressionLevel); - } else if (evidenceClass.equals(DepthEvidence.class)) { - rdStream = new FeatureOutputStream<>(outputFilePath, featureCodec, DepthEvidenceCodec::encode, - getBestAvailableSequenceDictionary(), compressionLevel); + private FeaturesHeader getHeader() { + final SAMSequenceDictionary dict; + final List samples; + final Object headerObj = getDrivingFeaturesHeader(); + if ( headerObj instanceof FeaturesHeader ) { + final FeaturesHeader header = (FeaturesHeader)headerObj; + dict = header.getDictionary() == null ? + getBestAvailableSequenceDictionary() : + header.getDictionary(); + samples = header.getSampleNames() == null ? sampleNames : header.getSampleNames(); } else { - throw new UserException.BadInput("Unsupported evidence type: " + evidenceClass.getSimpleName()); + dict = getBestAvailableSequenceDictionary(); + samples = sampleNames; + } + return new FeaturesHeader(evidenceClass.getSimpleName(), "?", dict, samples); + } + + private static void dumpSamples( final GATKPath outputPath, final List sampleNames ) { + try ( final BufferedWriter writer = writerForPath(outputPath) ) { + for ( final String sampleName : sampleNames ) { + writer.write(sampleName); + writer.newLine(); + } + } catch ( final IOException ioe ) { + throw new UserException("can't open sample-list-dump file: " + outputPath, ioe); } } - private void writeHeader() { - final Object header = getDrivingFeaturesHeader(); - if (header != null) { - if (header instanceof String) { - if (peStream != null) { - peStream.writeHeader((String) header); - } else if (srStream != null) { - srStream.writeHeader((String) header); - } else if (bafStream != null) { - bafStream.writeHeader((String) header); - } else { - rdStream.writeHeader((String) header); - } - } else { - throw new IllegalArgumentException("Expected header object of type " + String.class.getSimpleName()); + private static void dumpDictionary( final GATKPath outputPath, final SAMSequenceDictionary dict ) { + try ( final BufferedWriter writer = writerForPath(outputPath) ) { + for ( final SAMSequenceRecord record : dict.getSequences() ) { + writer.write(record.getSAMString()); + writer.newLine(); } + } catch ( final IOException ioe ) { + throw new UserException("can't open sequence-dict-dump file: " + outputPath, ioe); + } + } + + private static BufferedWriter writerForPath( final GATKPath outputPath ) throws IOException { + OutputStream stream = outputPath.getOutputStream(); + if ( IOUtil.hasBlockCompressedExtension(outputPath.toPath()) ) { + stream = new GZIPOutputStream(stream); } + return new BufferedWriter(new OutputStreamWriter(stream)); + } + + @SuppressWarnings("unchecked") + private void initializeOutput( final FeaturesHeader header ) { + final FeatureOutputCodec outputCodec = findOutputCodec(outputFilePath); + final Class outputClass = outputCodec.getFeatureType(); + if ( !evidenceClass.equals(outputClass) ) { + throw new UserException("The input file contains " + evidenceClass.getSimpleName() + + " features, but the output file would be expected to contain " + + outputClass.getSimpleName() + " features. Please choose an output file name " + + "appropriate for the evidence type."); + } + outputSink = (FeatureSink)outputCodec.makeSink(outputFilePath, + header.getDictionary(), + header.getSampleNames(), + compressionLevel); + } + + private static FeatureOutputCodec findOutputCodec( final GATKPath outputFilePath ) { + final String outputFileName = outputFilePath.toString(); + for ( final FeatureOutputCodec codec : outputCodecs ) { + if ( codec.canDecode(outputFileName) ) { + return codec; + } + } + throw new UserException("no codec found for path " + outputFileName); } @Override - public void apply(final Feature feature, + public void apply(final F feature, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { - if (peStream != null) { - peStream.add((DiscordantPairEvidence) feature); - } else if (srStream != null) { - srStream.add((SplitReadEvidence) feature); - } else if (bafStream != null) { - bafStream.add((BafEvidence) feature); - } else { - rdStream.add((DepthEvidence) feature); - } + outputSink.write(feature); } @Override public Object onTraversalSuccess() { super.onTraversalSuccess(); - if (peStream != null) { - peStream.close(); - } else if (srStream != null) { - srStream.close(); - } else if (bafStream != null) { - bafStream.close(); - } else { - rdStream.close(); - } + outputSink.close(); return null; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidence.java index 9f37f5ea6a6..124cca34ed5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidence.java @@ -13,6 +13,8 @@ public final class SplitReadEvidence implements Feature { final int count; final boolean strand; + public final static String BCI_VERSION = "1.0"; + public SplitReadEvidence(final String sample, final String contig, final int position, final int count, final boolean strand) { Utils.nonNull(sample); Utils.nonNull(contig); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java new file mode 100644 index 00000000000..c6f384d6e9d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java @@ -0,0 +1,708 @@ +package org.broadinstitute.hellbender.tools.walkers.sv; + +import com.google.common.annotations.VisibleForTesting; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.Locatable; +import htsjdk.variant.variantcontext.VariantContext; +import org.apache.logging.log4j.LogManager; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; +import org.broadinstitute.hellbender.engine.*; +import org.broadinstitute.hellbender.engine.filters.ReadFilter; +import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.sv.DiscordantPairEvidence; +import org.broadinstitute.hellbender.tools.sv.LocusDepth; +import org.broadinstitute.hellbender.tools.sv.SplitReadEvidence; +import org.broadinstitute.hellbender.utils.Nucleotide; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.codecs.*; +import org.broadinstitute.hellbender.utils.read.GATKRead; + +import java.util.*; +import java.util.function.Predicate; + +import static org.broadinstitute.hellbender.utils.read.ReadUtils.isBaseInsideAdaptor; + +/** + * Creates discordant read pair and split read evidence files for use in the GATK-SV pipeline. + * This tool emulates the functionality of the "svtk collect-pesr" used in v1 of the GATK-SV pipeline. + * + * The first output file, which should be named "*.pe.txt" or "*.pe.txt.gz" is a tab-delimited file + * containing information on discordant read pairs in the input cram, with the following columns: + * + *
      + *
    • read contig
    • + *
    • read start
    • + *
    • read strand
    • + *
    • mate contig
    • + *
    • mate start
    • + *
    • mate strand
    • + *
    • sample name
    • + *
    + * + * Only one record is emitted for each discordant read pair, at the read in the pair with the "upstream" start + * position according to the sequence dictionary contig ordering and coordinate. + * + * The second output file, which should be named "*.sr.txt" or "*.sr.txt.gz" contains the locations + * of all split read clippings in the input bam or cram, with the following columns: + * + *
      + *
    • contig
    • + *
    • clipping position
    • + *
    • direction: side of the read that was clipped (either "left" or "right")
    • + *
    • count: the number of reads clipped at this location in this direction
    • + *
    • sample name
    • + *
    + * + * The third output file, which should be named "*.ac.txt" or "*.ac.txt.gz" specifies allele counts: + * For each locus specified in an input VCF as a simple SNP, it gives a count, for each base, of the + * number of reads that cover that locus. + * It has the following columns: + * + *
      + *
    • contig
    • + *
    • position
    • + *
    • ref allele
    • + *
    • A observations
    • + *
    • C observations
    • + *
    • G observations
    • + *
    • T observations
    • + *
    + * + * Each of these output files may also be written as a block-compressed interval file, rather than + * as a tab-delimited text file by specifying an output file name that ends with ".bci" rather than + * ".txt". These files are self-indexing, and contain complete header information including sample + * name(s) and a dictionary for the contigs. + */ +@BetaFeature +@CommandLineProgramProperties( + summary = "Gathers paired-end and split read evidence files for use in the GATK-SV pipeline. Output files " + + "are a file containing the location of and orientation of read pairs marked as discordant, and a " + + "file containing the clipping location of all soft clipped reads and the orientation of the clipping.", + oneLineSummary = "Gathers paired-end and split read evidence files for use in the GATK-SV pipeline.", + programGroup = StructuralVariantDiscoveryProgramGroup.class +) +public class CollectSVEvidence extends ReadWalker { + + public static final String PAIRED_END_FILE_ARGUMENT_SHORT_NAME = "PE"; + public static final String PAIRED_END_FILE_ARGUMENT_LONG_NAME = "pe-file"; + public static final String SPLIT_READ_FILE_ARGUMENT_SHORT_NAME = "SR"; + public static final String SPLIT_READ_FILE_ARGUMENT_LONG_NAME = "sr-file"; + public static final String ALLELE_COUNT_OUTPUT_ARGUMENT_SHORT_NAME = "AC"; + public static final String ALLELE_COUNT_OUTPUT_ARGUMENT_LONG_NAME = "allele-count-file"; + public static final String ALLELE_COUNT_INPUT_ARGUMENT_SHORT_NAME = "F"; + public static final String ALLELE_COUNT_INPUT_ARGUMENT_LONG_NAME = "allele-count-vcf"; + public static final String SAMPLE_NAME_ARGUMENT_LONG_NAME = "sample-name"; + public static final String COMPRESSION_LEVEL_ARGUMENT_LONG_NAME = "compression-level"; + + @Argument(shortName = PAIRED_END_FILE_ARGUMENT_SHORT_NAME, + fullName = PAIRED_END_FILE_ARGUMENT_LONG_NAME, doc = "Output file for paired end evidence", + optional=true) + public GATKPath peFile; + + @Argument(shortName = SPLIT_READ_FILE_ARGUMENT_SHORT_NAME, + fullName = SPLIT_READ_FILE_ARGUMENT_LONG_NAME, doc = "Output file for split read evidence", + optional=true) + public GATKPath srFile; + + @Argument(shortName = ALLELE_COUNT_OUTPUT_ARGUMENT_SHORT_NAME, + fullName = ALLELE_COUNT_OUTPUT_ARGUMENT_LONG_NAME, + doc = "Output file for allele counts", + optional = true) + public GATKPath alleleCountOutputFilename; + + @Argument(shortName = ALLELE_COUNT_INPUT_ARGUMENT_SHORT_NAME, + fullName = ALLELE_COUNT_INPUT_ARGUMENT_LONG_NAME, + doc = "Input VCF of SNPs marking loci for allele counts", + optional = true) + public GATKPath alleleCountInputFilename; + + @Argument(fullName = "allele-count-min-mapq", + doc = "minimum mapping quality for read to be allele-counted", + optional = true) + public int minMapQ = 30; + + @Argument(fullName = "allele-count-min-baseq", + doc = "minimum base call quality for SNP to be allele-counted", + optional = true) + public int minQ = 20; + + @Argument(fullName = SAMPLE_NAME_ARGUMENT_LONG_NAME, doc = "Sample name") + String sampleName = null; + + @Argument(fullName = COMPRESSION_LEVEL_ARGUMENT_LONG_NAME, doc = "Output compression level") + int compressionLevel = 4; + + final Set observedDiscordantNames = new HashSet<>(); + final PriorityQueue splitPosBuffer = new PriorityQueue<>(new SplitPosComparator()); + final List discordantPairs = new ArrayList<>(); + + int currentDiscordantPosition = -1; + String currentChrom = null; + + private FeatureSink peWriter; + private FeatureSink srWriter; + private AlleleCounter alleleCounter; + + private SAMSequenceDictionary sequenceDictionary; + + @Override + public boolean requiresReads() { + return true; + } + + @Override + public void onTraversalStart() { + super.onTraversalStart(); + + sequenceDictionary = getBestAvailableSequenceDictionary(); + final List sampleNames = Collections.singletonList(sampleName); + peWriter = createPEWriter(); + srWriter = createSRWriter(); + if ( alleleCountInputFilename != null && alleleCountOutputFilename != null ) { + alleleCounter = new AlleleCounter(sequenceDictionary, sampleNames, compressionLevel, + alleleCountInputFilename, alleleCountOutputFilename, + minMapQ, minQ); + } else if ( alleleCountInputFilename != null ) { + throw new UserException("Having specified an allele-count-vcf input, " + + "you must also supply an allele-count-file for output."); + } else if ( alleleCountOutputFilename != null ) { + throw new UserException("Having specified an allele-count-file for output, " + + "you must also supply an allele-count-vcf as input."); + } + if ( peWriter == null && srWriter == null && alleleCounter == null ) { + throw new UserException("You must supply at least one output file: PE, SR, or AC"); + } + } + + @Override + public List getDefaultReadFilters() { + final List readFilters = new ArrayList<>(super.getDefaultReadFilters()); + readFilters.add(ReadFilterLibrary.MAPPED); + readFilters.add(ReadFilterLibrary.NOT_DUPLICATE); + return readFilters; + } + + @Override + public void apply(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext) { + if ( !(read.isPaired() && read.mateIsUnmapped()) && + !read.isSupplementaryAlignment() && + !read.isSecondaryAlignment() ) { + if ( srWriter != null && isSoftClipped(read) ) { + countSplitRead(read, splitPosBuffer, srWriter); + } + + if ( peWriter != null && !read.isProperlyPaired() ) { + reportDiscordantReadPair(read); + } + } + + if ( alleleCounter != null ) { + alleleCounter.apply(read); + } + } + + private FeatureSink createPEWriter() { + if ( peFile == null ) { + return null; + } + final String peFilename = peFile.toPath().toString(); + final List sampleNames = Collections.singletonList(sampleName); + final DiscordantPairEvidenceCodec peCodec = new DiscordantPairEvidenceCodec(); + final DiscordantPairEvidenceBCICodec peBCICodec = new DiscordantPairEvidenceBCICodec(); + if ( peBCICodec.canDecode(peFilename) ) { + return peBCICodec.makeSink(peFile, sequenceDictionary, sampleNames, compressionLevel); + } + if ( !peCodec.canDecode(peFilename) ) { + throw new UserException("Attempting to write discordant pair evidence to a file that " + + "can't be read as discordant pair evidence: " + peFilename + ". The file " + + "name should end with \".pe.txt\", \".pe.txt.gz\", or \".pe.bci\"."); + } + return peCodec.makeSink(peFile, sequenceDictionary, sampleNames, compressionLevel); + } + + private FeatureSink createSRWriter() { + if ( srFile == null ) { + return null; + } + final String srFilename = srFile.toPath().toString(); + final List sampleNames = Collections.singletonList(sampleName); + final SplitReadEvidenceCodec srCodec = new SplitReadEvidenceCodec(); + final SplitReadEvidenceBCICodec srBCICodec = new SplitReadEvidenceBCICodec(); + if ( srBCICodec.canDecode(srFilename) ) { + return srBCICodec.makeSink(srFile, sequenceDictionary, sampleNames, compressionLevel); + } + if ( !srCodec.canDecode(srFilename) ) { + throw new UserException("Attempting to write split read evidence to a file that " + + "can't be read as split read evidence: " + srFilename + ". The file " + + "name should end with \".sr.txt\", \".sr.txt.gz\", or \".sr.bci\"."); + } + return srCodec.makeSink(srFile, sequenceDictionary, sampleNames, compressionLevel); + } + + private void reportDiscordantReadPair(final GATKRead read) { + if (read.getStart() != currentDiscordantPosition) { + flushDiscordantReadPairs(); + currentDiscordantPosition = read.getStart(); + observedDiscordantNames.clear(); + } + + final DiscordantRead reportableDiscordantReadPair = getReportableDiscordantReadPair(read, observedDiscordantNames, + sequenceDictionary); + if (reportableDiscordantReadPair != null) { + discordantPairs.add(reportableDiscordantReadPair); + } + } + + @VisibleForTesting + public DiscordantRead getReportableDiscordantReadPair(final GATKRead read, final Set observedDiscordantNamesAtThisLocus, + final SAMSequenceDictionary samSequenceDictionary) { + final int readSeqId = samSequenceDictionary.getSequenceIndex(read.getContig()); + final int mateSeqId = samSequenceDictionary.getSequenceIndex(read.getMateContig()); + if (readSeqId < mateSeqId) { + return new DiscordantRead(read); + } else if (readSeqId == mateSeqId) { + if (read.getStart() < read.getMateStart()) { + return new DiscordantRead(read); + } else if (read.getStart() == read.getMateStart()) { + final boolean seenBefore = observedDiscordantNamesAtThisLocus.remove(read.getName()); + if (! seenBefore) { + final DiscordantRead discordantRead = new DiscordantRead(read); + observedDiscordantNamesAtThisLocus.add(read.getName()); + return discordantRead; + } + } + } + return null; + } + + private void flushDiscordantReadPairs() { + final Comparator discReadComparator = new DiscordantReadComparator(sequenceDictionary); + + discordantPairs.sort(discReadComparator); + discordantPairs.forEach(this::writeDiscordantPair); + discordantPairs.clear(); + } + + private void writeDiscordantPair(final DiscordantRead r) { + peWriter.write(new DiscordantPairEvidence(sampleName, + r.getContig(), r.getStart(), !r.isReadReverseStrand(), + r.getMateContig(), r.getMateStart(), !r.isMateReverseStrand())); + } + + /** + * Adds split read information about the current read to the counts in splitCounts. Flushes split read counts to + * srWriter if necessary. + */ + @VisibleForTesting + public void countSplitRead(final GATKRead read, + final PriorityQueue splitCounts, + final FeatureSink srWriter ) { + final SplitPos splitPosition = getSplitPosition(read); + final int readStart = read.getStart(); + if (splitPosition.direction == POSITION.MIDDLE) { + return; + } + if (currentChrom == null) { + currentChrom = read.getContig(); + } else if (!currentChrom.equals(read.getContig())) { + flushSplitCounts(splitPos -> true, splitCounts, srWriter); + currentChrom = read.getContig(); + } else { + flushSplitCounts(sp -> (sp.pos < readStart - 1), splitCounts, srWriter); + } + + splitCounts.add(splitPosition); + } + + private void flushSplitCounts(final Predicate flushablePosition, + final PriorityQueue splitCounts, + final FeatureSink srWriter) { + + while (splitCounts.size() > 0 && flushablePosition.test(splitCounts.peek())) { + SplitPos pos = splitCounts.poll(); + int countAtPos = 1; + while (splitCounts.size() > 0 && splitCounts.peek().equals(pos)) { + countAtPos++; + splitCounts.poll(); + } + final SplitReadEvidence splitRead = new SplitReadEvidence(sampleName, currentChrom, pos.pos, countAtPos, pos.direction.equals(POSITION.RIGHT)); + srWriter.write(splitRead); + } + } + + private SplitPos getSplitPosition(GATKRead read) { + if (read.getCigar().getFirstCigarElement().getOperator() == CigarOperator.M) { + final int matchLength = read.getCigar().getCigarElements().stream().filter(e -> e.getOperator().consumesReferenceBases()).mapToInt(CigarElement::getLength).sum(); + return new SplitPos(read.getStart() + matchLength, POSITION.RIGHT); + } else if (read.getCigar().getLastCigarElement().getOperator() == CigarOperator.M) { + return new SplitPos(read.getStart(), POSITION.LEFT); + } + + return new SplitPos(-1, POSITION.MIDDLE); + } + + private boolean isSoftClipped(final GATKRead read) { + final CigarOperator firstOperator = read.getCigar().getFirstCigarElement().getOperator(); + final CigarOperator lastOperator = read.getCigar().getLastCigarElement().getOperator(); + return (firstOperator == CigarOperator.SOFT_CLIP && lastOperator != CigarOperator.SOFT_CLIP) || + (firstOperator != CigarOperator.SOFT_CLIP && lastOperator == CigarOperator.SOFT_CLIP); + } + + @Override + public Object onTraversalSuccess() { + flushSplitCounts(splitPos -> true, splitPosBuffer, srWriter); + flushDiscordantReadPairs(); + if ( alleleCounter != null ) { + alleleCounter.close(); + } + return null; + } + + @Override + public void closeTool() { + super.closeTool(); + if ( peWriter != null ) { + peWriter.close(); + } + if ( srWriter != null ) { + srWriter.close(); + } + } + + enum POSITION { + LEFT ("left"), + MIDDLE ("middle"), + RIGHT ("right"); + + private String description; + + POSITION(final String description) { + this.description = description; + } + + public String getDescription() { + return description; + } + } + + @VisibleForTesting final static class DiscordantRead { + private boolean readReverseStrand; + private boolean mateReverseStrand; + private String contig; + private int start; + private String mateContig; + private int mateStart; + private String name; + + public DiscordantRead(final GATKRead read) { + this.readReverseStrand = read.isReverseStrand(); + this.mateReverseStrand = read.mateIsReverseStrand(); + this.contig = read.getContig(); + this.start = read.getStart(); + this.mateContig = read.getMateContig(); + this.mateStart = read.getMateStart(); + this.name = read.getName(); + } + + public boolean isReadReverseStrand() { + return readReverseStrand; + } + + public void setReadReverseStrand(final boolean readReverseStrand) { + this.readReverseStrand = readReverseStrand; + } + + public boolean isMateReverseStrand() { + return mateReverseStrand; + } + + public void setMateReverseStrand(final boolean mateReverseStrand) { + this.mateReverseStrand = mateReverseStrand; + } + + public String getContig() { + return contig; + } + + public void setContig(final String contig) { + this.contig = contig; + } + + public int getStart() { + return start; + } + + public void setStart(final int start) { + this.start = start; + } + + public String getMateContig() { + return mateContig; + } + + public void setMateContig(final String mateContig) { + this.mateContig = mateContig; + } + + public int getMateStart() { + return mateStart; + } + + public void setMateStart(final int mateStart) { + this.mateStart = mateStart; + } + + public String getName() { + return name; + } + + public void setName(final String name) { + this.name = name; + } + + @Override + public boolean equals(final Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + final DiscordantRead that = (DiscordantRead) o; + + if (readReverseStrand != that.readReverseStrand) return false; + if (mateReverseStrand != that.mateReverseStrand) return false; + if (start != that.start) return false; + if (mateStart != that.mateStart) return false; + if (contig != null ? !contig.equals(that.contig) : that.contig != null) return false; + if (mateContig != null ? !mateContig.equals(that.mateContig) : that.mateContig != null) return false; + return name != null ? name.equals(that.name) : that.name == null; + } + + @Override + public int hashCode() { + int result = (readReverseStrand ? 1 : 0); + result = 31 * result + (mateReverseStrand ? 1 : 0); + result = 31 * result + (contig != null ? contig.hashCode() : 0); + result = 31 * result + start; + result = 31 * result + (mateContig != null ? mateContig.hashCode() : 0); + result = 31 * result + mateStart; + result = 31 * result + (name != null ? name.hashCode() : 0); + return result; + } + } + + @VisibleForTesting final static class SplitPos { + public POSITION direction; + public int pos; + + public SplitPos(final int start, final POSITION direction) { + this.pos = start; + this.direction = direction; + } + + @Override + public boolean equals(final Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + final SplitPos splitPos = (SplitPos) o; + + if (pos != splitPos.pos) return false; + return direction.ordinal() == splitPos.direction.ordinal(); + } + + @Override + public int hashCode() { + int result = direction != null ? direction.ordinal() : 0; + result = 31 * result + pos; + return result; + } + } + + @VisibleForTesting final static class SplitPosComparator implements Comparator { + @Override + public int compare(final SplitPos o1, final SplitPos o2) { + if (o1.pos != o2.pos) { + return Integer.compare(o1.pos, o2.pos); + } else { + return o1.direction.compareTo(o2.direction); + } + } + } + + @VisibleForTesting final static class DiscordantReadComparator implements Comparator { + + private final Comparator internalComparator; + + public DiscordantReadComparator(final SAMSequenceDictionary sequenceDictionary) { + internalComparator = Comparator.comparing((DiscordantRead r) -> sequenceDictionary.getSequenceIndex(r.getContig())) + .thenComparing(DiscordantRead::getStart) + .thenComparing(DiscordantRead::isReadReverseStrand) + .thenComparing((DiscordantRead r) -> sequenceDictionary.getSequenceIndex(r.getMateContig())) + .thenComparing(DiscordantRead::getMateStart) + .thenComparing(DiscordantRead::isMateReverseStrand); + + } + + @Override + public int compare(final DiscordantRead o1, final DiscordantRead o2) { + return internalComparator.compare(o1, o2); + } + } + + @VisibleForTesting + final static class AlleleCounter { + private final SAMSequenceDictionary dict; + private final FeatureSink writer; + private final int minMapQ; + private final int minQ; + private final Iterator snpSourceItr; + private final Deque locusDepthQueue; + + public AlleleCounter( final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel, + final GATKPath inputPath, + final GATKPath outputPath, + final int minMapQ, + final int minQ ) { + this.dict = dict; + final String outputFilename = outputPath.toPath().toString(); + final LocusDepthBCICodec bciCodec = new LocusDepthBCICodec(); + if ( bciCodec.canDecode(outputFilename) ) { + this.writer = bciCodec.makeSink(outputPath, dict, sampleNames, compressionLevel); + } else { + final LocusDepthCodec codec = new LocusDepthCodec(); + if ( !codec.canDecode(outputFilename) ) { + throw new UserException("Attempting to write locus depth evidence to a file that " + + "can't be read as locus depth evidence: " + outputFilename + ". The file " + + "name should end with \".ld.txt\", \".ld.txt.gz\", or \".ld.bci\"."); + } + this.writer = codec.makeSink(outputPath, dict, sampleNames, compressionLevel); + } + this.minMapQ = minMapQ; + this.minQ = minQ; + final FeatureDataSource snpSource = + new FeatureDataSource<>(inputPath.toPath().toString()); + dict.assertSameDictionary(snpSource.getSequenceDictionary()); + this.snpSourceItr = snpSource.iterator(); + this.locusDepthQueue = new ArrayDeque<>(100); + readNextLocus(); + } + + public void apply( final GATKRead read ) { + if ( read.getMappingQuality() < minMapQ || locusDepthQueue.isEmpty() ) { + return; + } + + // clean queue of LocusCounts that precede the current read + final SimpleInterval readLoc = + new SimpleInterval(read.getContig(), read.getStart(), read.getEnd()); + while ( true ) { + final LocusDepth locusDepth = locusDepthQueue.getFirst(); + if ( compareLocus(locusDepth.getContig(), locusDepth.getStart(), readLoc) >= 0 ) { + break; + } + writer.write(locusDepthQueue.removeFirst()); + if ( locusDepthQueue.isEmpty() ) { + if ( !readNextLocus() ) { + return; + } + } + } + + // make sure that the last LocusCount in the queue occurs after the current read + // if such a LocusCount is available + while ( true ) { + final LocusDepth locusDepth = locusDepthQueue.getLast(); + if ( compareLocus(locusDepth.getContig(), locusDepth.getStart(), readLoc) > 0 || + !readNextLocus() ) { + break; + } + } + + walkReadMatches(read); + } + + public void walkReadMatches( final GATKRead read ) { + int opStart = read.getStart(); + int readIdx = 0; + final byte[] calls = read.getBasesNoCopy(); + final byte[] quals = read.getBaseQualitiesNoCopy(); + for ( final CigarElement cigEle : read.getCigar().getCigarElements() ) { + final int eleLen = cigEle.getLength(); + final CigarOperator cigOp = cigEle.getOperator(); + if ( cigOp.isAlignment() ) { + final int opEnd = opStart + eleLen - 1; + final SimpleInterval opLoc = + new SimpleInterval(read.getContig(), opStart, opEnd); + for ( final LocusDepth locusDepth : locusDepthQueue ) { + final int cmp = + compareLocus(locusDepth.getContig(), locusDepth.getStart(), opLoc); + if ( cmp > 0 ) { + break; + } + // don't count base calls that aren't really part of the template + // (if the template is shorter than the read, we can call into adaptor sequence) + if ( cmp == 0 && !isBaseInsideAdaptor(read, locusDepth.getStart()) ) { + final int callIdx = readIdx + locusDepth.getStart() - opStart; + if ( quals[callIdx] < minQ ) { + continue; + } + final Nucleotide call = Nucleotide.decode(calls[callIdx]); + if ( call.isStandard() ) { + locusDepth.observe(call.ordinal()); + } + } + } + } + if ( cigOp.consumesReadBases() ) { + readIdx += eleLen; + } + if ( cigOp.consumesReferenceBases() ) { + opStart += eleLen; + } + } + } + + public void close() { + while ( !locusDepthQueue.isEmpty() ) { + writer.write(locusDepthQueue.removeFirst()); + } + writer.close(); + } + + private int compareLocus( final String contig, final int position, final Locatable loc ) { + int cmp = Integer.compare(dict.getSequenceIndex(contig), dict.getSequenceIndex(loc.getContig())); + if ( cmp == 0 ) { + if ( position < loc.getStart() ) { + cmp = -1; + } else if ( position > loc.getEnd() ) { + cmp = 1; + } + } + return cmp; + } + + private boolean readNextLocus() { + if ( !snpSourceItr.hasNext() ) { + return false; + } + VariantContext snp = snpSourceItr.next(); + while ( !snp.isSNP() ) { + if ( !snpSourceItr.hasNext() ) { + return false; + } + snp = snpSourceItr.next(); + } + final byte[] refSeq = snp.getReference().getBases(); + final LocusDepth locusDepth = new LocusDepth(snp, refSeq[0]); + locusDepthQueue.add(locusDepth); + return true; + } + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollection.java deleted file mode 100644 index 3f5b2a6115b..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollection.java +++ /dev/null @@ -1,437 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.sv; - -import com.google.common.annotations.VisibleForTesting; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMSequenceDictionary; -import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; -import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; -import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; -import org.broadinstitute.hellbender.engine.FeatureContext; -import org.broadinstitute.hellbender.engine.GATKPath; -import org.broadinstitute.hellbender.engine.ReadWalker; -import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.engine.filters.ReadFilter; -import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary; -import org.broadinstitute.hellbender.tools.sv.DiscordantPairEvidence; -import org.broadinstitute.hellbender.tools.sv.SplitReadEvidence; -import org.broadinstitute.hellbender.utils.codecs.DiscordantPairEvidenceCodec; -import org.broadinstitute.hellbender.utils.codecs.SplitReadEvidenceCodec; -import org.broadinstitute.hellbender.utils.io.FeatureOutputStream; -import org.broadinstitute.hellbender.utils.read.GATKRead; - -import java.util.*; -import java.util.function.Predicate; - -/** - * Creates discordant read pair and split read evidence files for use in the GATK-SV pipeline. - * - * This tool emulates the functionality of the "svtk collect-pesr" used in v1 of the GATK-SV pipeline. - * The first output file is a tab-delimited file containing information on discordant read pairs in the - * input cram, with the following columns: - * - *
      - *
    • read contig
    • - *
    • read start
    • - *
    • read strand
    • - *
    • mate contig
    • - *
    • mate start
    • - *
    • mate strand
    • - *
    • sample name
    • - *
    - * - * Only one record is emitted for each discordant read pair, at the read in the pair with the "upstream" start - * position according to the sequence dictionary contig ordering and coordinate. - * - * The second file contains the locations of all split read clippings in the input bam or cram, with the - * following columns: - * - *
      - *
    • contig
    • - *
    • clipping position
    • - *
    • direction: side of the read that was clipped (either "left" or "right")
    • - *
    • count: the number of reads clipped at this location in this direction
    • - *
    • sample name
    • - *
    - */ -@BetaFeature -@CommandLineProgramProperties( - summary = "Gathers paired-end and split read evidence files for use in the GATK-SV pipeline. Output files " + - "are a file containing the location of and orientation of read pairs marked as discordant, and a " + - "file containing the clipping location of all soft clipped reads and the orientation of the clipping.", - oneLineSummary = "Gathers paired-end and split read evidence files for use in the GATK-SV pipeline.", - programGroup = StructuralVariantDiscoveryProgramGroup.class -) -public class PairedEndAndSplitReadEvidenceCollection extends ReadWalker { - - public static final String PAIRED_END_FILE_ARGUMENT_SHORT_NAME = "PE"; - public static final String PAIRED_END_FILE_ARGUMENT_LONG_NAME = "pe-file"; - public static final String SPLIT_READ_FILE_ARGUMENT_SHORT_NAME = "SR"; - public static final String SPLIT_READ_FILE_ARGUMENT_LONG_NAME = "sr-file"; - public static final String SAMPLE_NAME_ARGUMENT_LONG_NAME = "sample-name"; - public static final String COMPRESSION_LEVEL_ARGUMENT_LONG_NAME = "compression-level"; - - @Argument(shortName = PAIRED_END_FILE_ARGUMENT_SHORT_NAME, fullName = PAIRED_END_FILE_ARGUMENT_LONG_NAME, doc = "Output file for paired end evidence", optional=false) - public GATKPath peFile; - - @Argument(shortName = SPLIT_READ_FILE_ARGUMENT_SHORT_NAME, fullName = SPLIT_READ_FILE_ARGUMENT_LONG_NAME, doc = "Output file for split read evidence", optional=false) - public GATKPath srFile; - - @Argument(fullName = SAMPLE_NAME_ARGUMENT_LONG_NAME, doc = "Sample name") - String sampleName = null; - - @Argument(fullName = COMPRESSION_LEVEL_ARGUMENT_LONG_NAME, doc = "Output compression level") - int compressionLevel = 4; - - final Set observedDiscordantNames = new HashSet<>(); - final PriorityQueue splitPosBuffer = new PriorityQueue<>(new SplitPosComparator()); - final List discordantPairs = new ArrayList<>(); - - int currentDiscordantPosition = -1; - String currentChrom = null; - - private FeatureOutputStream peWriter; - private FeatureOutputStream srWriter; - - private SAMSequenceDictionary sequenceDictionary; - - @Override - public boolean requiresReads() { - return true; - } - - - @Override - public void onTraversalStart() { - super.onTraversalStart(); - sequenceDictionary = getBestAvailableSequenceDictionary(); - peWriter = new FeatureOutputStream<>(peFile, new DiscordantPairEvidenceCodec(), DiscordantPairEvidenceCodec::encode, sequenceDictionary, compressionLevel); - srWriter = new FeatureOutputStream<>(srFile, new SplitReadEvidenceCodec(), SplitReadEvidenceCodec::encode, sequenceDictionary, compressionLevel); - } - - @Override - public List getDefaultReadFilters() { - final List readFilters = new ArrayList<>(super.getDefaultReadFilters()); - readFilters.add(ReadFilterLibrary.MATE_UNMAPPED_AND_UNMAPPED_READ_FILTER); - readFilters.add(ReadFilterLibrary.NOT_DUPLICATE); - readFilters.add(ReadFilterLibrary.NOT_SECONDARY_ALIGNMENT); - readFilters.add(ReadFilterLibrary.NOT_SUPPLEMENTARY_ALIGNMENT); - return readFilters; - } - - @Override - public void apply(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext) { - if (isSoftClipped(read)) { - countSplitRead(read, splitPosBuffer, srWriter); - } - - if (! read.isProperlyPaired()) { - reportDiscordantReadPair(read); - } - } - - private void reportDiscordantReadPair(final GATKRead read) { - if (read.getStart() != currentDiscordantPosition) { - flushDiscordantReadPairs(); - currentDiscordantPosition = read.getStart(); - observedDiscordantNames.clear(); - } - - final DiscordantRead reportableDiscordantReadPair = getReportableDiscordantReadPair(read, observedDiscordantNames, - sequenceDictionary); - if (reportableDiscordantReadPair != null) { - discordantPairs.add(reportableDiscordantReadPair); - } - } - - @VisibleForTesting - public DiscordantRead getReportableDiscordantReadPair(final GATKRead read, final Set observedDiscordantNamesAtThisLocus, - final SAMSequenceDictionary samSequenceDictionary) { - final int readSeqId = samSequenceDictionary.getSequenceIndex(read.getContig()); - final int mateSeqId = samSequenceDictionary.getSequenceIndex(read.getMateContig()); - if (readSeqId < mateSeqId) { - return new DiscordantRead(read); - } else if (readSeqId == mateSeqId) { - if (read.getStart() < read.getMateStart()) { - return new DiscordantRead(read); - } else if (read.getStart() == read.getMateStart()) { - final boolean seenBefore = observedDiscordantNamesAtThisLocus.remove(read.getName()); - if (! seenBefore) { - final DiscordantRead discordantRead = new DiscordantRead(read); - observedDiscordantNamesAtThisLocus.add(read.getName()); - return discordantRead; - } - } - } - return null; - } - - private void flushDiscordantReadPairs() { - final Comparator discReadComparator = new DiscordantReadComparator(sequenceDictionary); - - discordantPairs.sort(discReadComparator); - discordantPairs.forEach(this::writeDiscordantPair); - discordantPairs.clear(); - } - - private void writeDiscordantPair(final DiscordantRead r) { - final boolean strandA = !r.isReadReverseStrand(); - final boolean strandB = !r.isMateReverseStrand(); - - final DiscordantPairEvidence discordantPair = new DiscordantPairEvidence(sampleName, r.getContig(), r.getStart(), strandA, r.getMateContig(), r.getMateStart(), strandB); - peWriter.add(discordantPair); - } - - /** - * Adds split read information about the current read to the counts in splitCounts. Flushes split read counts to - * srWriter if necessary. - */ - @VisibleForTesting - public void countSplitRead(final GATKRead read, final PriorityQueue splitCounts, final FeatureOutputStream srWriter) { - final SplitPos splitPosition = getSplitPosition(read); - final int readStart = read.getStart(); - if (splitPosition.direction == POSITION.MIDDLE) { - return; - } - if (currentChrom == null) { - currentChrom = read.getContig(); - } else if (!currentChrom.equals(read.getContig())) { - flushSplitCounts(splitPos -> true, srWriter, splitCounts); - currentChrom = read.getContig(); - } else { - flushSplitCounts(sp -> (sp.pos < readStart - 1), srWriter, splitCounts); - } - - splitCounts.add(splitPosition); - } - - private void flushSplitCounts(final Predicate flushablePosition, final FeatureOutputStream srWriter, final PriorityQueue splitCounts) { - - while (splitCounts.size() > 0 && flushablePosition.test(splitCounts.peek())) { - SplitPos pos = splitCounts.poll(); - int countAtPos = 1; - while (splitCounts.size() > 0 && splitCounts.peek().equals(pos)) { - countAtPos++; - splitCounts.poll(); - } - final SplitReadEvidence splitRead = new SplitReadEvidence(sampleName, currentChrom, pos.pos, countAtPos, pos.direction.equals(POSITION.RIGHT)); - srWriter.add(splitRead); - } - } - - private SplitPos getSplitPosition(GATKRead read) { - if (read.getCigar().getFirstCigarElement().getOperator() == CigarOperator.M) { - final int matchLength = read.getCigar().getCigarElements().stream().filter(e -> e.getOperator().consumesReferenceBases()).mapToInt(CigarElement::getLength).sum(); - return new SplitPos(read.getStart() + matchLength, POSITION.RIGHT); - } else if (read.getCigar().getLastCigarElement().getOperator() == CigarOperator.M) { - return new SplitPos(read.getStart(), POSITION.LEFT); - } - - return new SplitPos(-1, POSITION.MIDDLE); - } - - private boolean isSoftClipped(final GATKRead read) { - final CigarOperator firstOperator = read.getCigar().getFirstCigarElement().getOperator(); - final CigarOperator lastOperator = read.getCigar().getLastCigarElement().getOperator(); - return (firstOperator == CigarOperator.SOFT_CLIP && lastOperator != CigarOperator.SOFT_CLIP) || - (firstOperator != CigarOperator.SOFT_CLIP && lastOperator == CigarOperator.SOFT_CLIP); - } - - @Override - public Object onTraversalSuccess() { - flushSplitCounts(splitPos -> true, srWriter, splitPosBuffer); - flushDiscordantReadPairs(); - return null; - } - - @Override - public void closeTool() { - super.closeTool(); - if (peWriter != null) { - peWriter.close(); - } - if (srWriter != null) { - srWriter.close(); - } - } - - enum POSITION { - LEFT ("left"), - MIDDLE ("middle"), - RIGHT ("right"); - - private String description; - - POSITION(final String description) { - this.description = description; - } - - public String getDescription() { - return description; - } - } - - @VisibleForTesting final static class DiscordantRead { - private boolean readReverseStrand; - private boolean mateReverseStrand; - private String contig; - private int start; - private String mateContig; - private int mateStart; - private String name; - - public DiscordantRead(final GATKRead read) { - this.readReverseStrand = read.isReverseStrand(); - this.mateReverseStrand = read.mateIsReverseStrand(); - this.contig = read.getContig(); - this.start = read.getStart(); - this.mateContig = read.getMateContig(); - this.mateStart = read.getMateStart(); - this.name = read.getName(); - } - - public boolean isReadReverseStrand() { - return readReverseStrand; - } - - public void setReadReverseStrand(final boolean readReverseStrand) { - this.readReverseStrand = readReverseStrand; - } - - public boolean isMateReverseStrand() { - return mateReverseStrand; - } - - public void setMateReverseStrand(final boolean mateReverseStrand) { - this.mateReverseStrand = mateReverseStrand; - } - - public String getContig() { - return contig; - } - - public void setContig(final String contig) { - this.contig = contig; - } - - public int getStart() { - return start; - } - - public void setStart(final int start) { - this.start = start; - } - - public String getMateContig() { - return mateContig; - } - - public void setMateContig(final String mateContig) { - this.mateContig = mateContig; - } - - public int getMateStart() { - return mateStart; - } - - public void setMateStart(final int mateStart) { - this.mateStart = mateStart; - } - - public String getName() { - return name; - } - - public void setName(final String name) { - this.name = name; - } - - @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final DiscordantRead that = (DiscordantRead) o; - - if (readReverseStrand != that.readReverseStrand) return false; - if (mateReverseStrand != that.mateReverseStrand) return false; - if (start != that.start) return false; - if (mateStart != that.mateStart) return false; - if (contig != null ? !contig.equals(that.contig) : that.contig != null) return false; - if (mateContig != null ? !mateContig.equals(that.mateContig) : that.mateContig != null) return false; - return name != null ? name.equals(that.name) : that.name == null; - } - - @Override - public int hashCode() { - int result = (readReverseStrand ? 1 : 0); - result = 31 * result + (mateReverseStrand ? 1 : 0); - result = 31 * result + (contig != null ? contig.hashCode() : 0); - result = 31 * result + start; - result = 31 * result + (mateContig != null ? mateContig.hashCode() : 0); - result = 31 * result + mateStart; - result = 31 * result + (name != null ? name.hashCode() : 0); - return result; - } - } - - @VisibleForTesting final static class SplitPos { - public POSITION direction; - public int pos; - - public SplitPos(final int start, final POSITION direction) { - this.pos = start; - this.direction = direction; - } - - @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final SplitPos splitPos = (SplitPos) o; - - if (pos != splitPos.pos) return false; - return direction.ordinal() == splitPos.direction.ordinal(); - } - - @Override - public int hashCode() { - int result = direction != null ? direction.ordinal() : 0; - result = 31 * result + pos; - return result; - } - } - - @VisibleForTesting final static class SplitPosComparator implements Comparator { - @Override - public int compare(final SplitPos o1, final SplitPos o2) { - if (o1.pos != o2.pos) { - return Integer.compare(o1.pos, o2.pos); - } else { - return o1.direction.compareTo(o2.direction); - } - } - } - - @VisibleForTesting final static class DiscordantReadComparator implements Comparator { - - private final Comparator internalComparator; - - public DiscordantReadComparator(final SAMSequenceDictionary sequenceDictionary) { - internalComparator = Comparator.comparing((DiscordantRead r) -> sequenceDictionary.getSequenceIndex(r.getContig())) - .thenComparing(DiscordantRead::getStart) - .thenComparing(DiscordantRead::isReadReverseStrand) - .thenComparing((DiscordantRead r) -> sequenceDictionary.getSequenceIndex(r.getMateContig())) - .thenComparing(DiscordantRead::getMateStart) - .thenComparing(DiscordantRead::isMateReverseStrand); - - } - - @Override - public int compare(final DiscordantRead o1, final DiscordantRead o2) { - return internalComparator.compare(o1, o2); - } - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVInterval.java b/src/main/java/org/broadinstitute/hellbender/utils/SVInterval.java similarity index 97% rename from src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVInterval.java rename to src/main/java/org/broadinstitute/hellbender/utils/SVInterval.java index 5b1480763ab..bd0fda94318 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVInterval.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/SVInterval.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.tools.spark.sv.utils; +package org.broadinstitute.hellbender.utils; import com.esotericsoftware.kryo.DefaultSerializer; import com.esotericsoftware.kryo.Kryo; @@ -6,8 +6,7 @@ import com.esotericsoftware.kryo.io.Output; import com.google.common.annotations.VisibleForTesting; import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.tools.spark.sv.utils.SVLocation; /** * Naturally collating, simple interval diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalTree.java b/src/main/java/org/broadinstitute/hellbender/utils/SVIntervalTree.java similarity index 99% rename from src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalTree.java rename to src/main/java/org/broadinstitute/hellbender/utils/SVIntervalTree.java index 1a182fc5755..3f0faede820 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalTree.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/SVIntervalTree.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.tools.spark.sv.utils; +package org.broadinstitute.hellbender.utils; import com.esotericsoftware.kryo.DefaultSerializer; import com.esotericsoftware.kryo.Kryo; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/AbstractBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/AbstractBCICodec.java new file mode 100644 index 00000000000..83bcdf2762b --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/AbstractBCICodec.java @@ -0,0 +1,42 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.samtools.util.LocationAware; +import htsjdk.tribble.Feature; +import htsjdk.tribble.FeatureCodec; +import htsjdk.tribble.FeatureCodecHeader; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; + +import java.io.IOException; +import java.io.InputStream; + +public abstract class AbstractBCICodec + implements FeatureOutputCodec>, FeatureCodec> { + + @Override + public Feature decodeLoc( final Reader reader ) throws IOException { + return decode(reader); + } + + @Override + public FeatureCodecHeader readHeader( final Reader reader ) throws IOException { + return reader.getFeatureCodecHeader(); + } + + @Override + public Reader makeSourceFromStream( final InputStream is ) { + throw new GATKException("unimplemented method"); + } + + @Override + public LocationAware makeIndexableSourceFromStream( final InputStream is ) { + throw new GATKException("unimplemented method"); + } + + @Override + public boolean isDone( final Reader reader ) { return !reader.hasNext(); } + + @Override + public void close( final Reader reader ) { reader.close(); } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/BafEvidenceBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/BafEvidenceBCICodec.java new file mode 100644 index 00000000000..e9ab22bc581 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/BafEvidenceBCICodec.java @@ -0,0 +1,66 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.tribble.Feature; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.sv.BafEvidence; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.List; + +public class BafEvidenceBCICodec extends AbstractBCICodec { + private boolean versionChecked = false; + private static final String BAF_BCI_FILE_EXTENSION = ".baf.bci"; + + @Override + public BafEvidence decode( final Reader reader ) throws IOException { + if ( !versionChecked ) { + if ( !BafEvidence.BCI_VERSION.equals(reader.getVersion()) ) { + throw new UserException("baf.bci file has wrong version: expected " + + BafEvidence.BCI_VERSION + " but found " + reader.getVersion()); + } + versionChecked = true; + } + final DataInputStream dis = reader.getStream(); + final String sample = reader.getSampleNames().get(dis.readInt()); + final String contig = reader.getDictionary().getSequence(dis.readInt()).getSequenceName(); + final int position = dis.readInt(); + final double value = dis.readDouble(); + return new BafEvidence(sample, contig, position, value); + } + + @Override + public Class getFeatureType() { return BafEvidence.class; } + + @Override + public boolean canDecode( final String path ) { + return path.toLowerCase().endsWith(BAF_BCI_FILE_EXTENSION); + } + + @Override + public Writer makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + final String className = BafEvidence.class.getSimpleName(); + return new Writer<>(path, + new FeaturesHeader(className, BafEvidence.BCI_VERSION, dict, sampleNames), + this::encode, + compressionLevel); + } + + @Override + public void encode( final BafEvidence bafEvidence, final Writer writer ) throws IOException { + final DataOutputStream dos = writer.getStream(); + dos.writeInt(writer.getSampleIndex(bafEvidence.getSample())); + dos.writeInt(writer.getContigIndex(bafEvidence.getContig())); + dos.writeInt(bafEvidence.getStart()); + dos.writeDouble(bafEvidence.getValue()); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/BafEvidenceCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/BafEvidenceCodec.java index 89284c1f9e2..ea70b1aac1a 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/BafEvidenceCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/BafEvidenceCodec.java @@ -1,16 +1,20 @@ package org.broadinstitute.hellbender.utils.codecs; import com.google.common.base.Splitter; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.LineIterator; +import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.tools.sv.BafEvidence; +import org.broadinstitute.hellbender.utils.io.FeatureOutputStream; import java.util.Arrays; import java.util.List; -public class BafEvidenceCodec extends AsciiFeatureCodec { +public class BafEvidenceCodec extends AsciiFeatureCodec + implements FeatureOutputCodec> { public static final String FORMAT_SUFFIX = ".baf.txt"; public static final String COL_DELIMITER = "\t"; @@ -40,19 +44,31 @@ public BafEvidence decode(final String line) { @Override public boolean canDecode(final String path) { - final String toDecode; - if (IOUtil.hasBlockCompressedExtension(path)) { - toDecode = path.substring(0, path.lastIndexOf(".")); - } else { - toDecode = path; + String toDecode = path.toLowerCase(); + if ( IOUtil.hasBlockCompressedExtension(toDecode) ) { + toDecode = toDecode.substring(0, toDecode.lastIndexOf('.')); } - return toDecode.toLowerCase().endsWith(FORMAT_SUFFIX); + return toDecode.endsWith(FORMAT_SUFFIX); } @Override public Object readActualHeader(final LineIterator reader) { return null; } - public static String encode(final BafEvidence ev) { + @Override + public FeatureOutputStream makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + return new FeatureOutputStream<>(path, getTabixFormat(), BafEvidenceCodec::encode, + dict, compressionLevel); + } + + @Override + public void encode( final BafEvidence ev, final FeatureOutputStream os ) { + os.write(ev); + } + + public static String encode( final BafEvidence ev ) { final List columns = Arrays.asList( ev.getContig(), Integer.toString(ev.getStart() - 1), diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/DepthEvidenceBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/DepthEvidenceBCICodec.java new file mode 100644 index 00000000000..61ec20dc7eb --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/DepthEvidenceBCICodec.java @@ -0,0 +1,73 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.samtools.SAMSequenceDictionary; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.sv.DepthEvidence; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.List; + +public class DepthEvidenceBCICodec extends AbstractBCICodec { + private boolean versionChecked = false; + private static final String RD_BCI_FILE_EXTENSION = ".rd.bci"; + + @Override + public DepthEvidence decode( final Reader reader ) throws IOException { + if ( !versionChecked ) { + if ( !DepthEvidence.BCI_VERSION.equals(reader.getVersion()) ) { + throw new UserException("baf.bci file has wrong version: expected " + + DepthEvidence.BCI_VERSION + " but found " + reader.getVersion()); + } + versionChecked = true; + } + final DataInputStream dis = reader.getStream(); + final String contig = reader.getDictionary().getSequence(dis.readInt()).getSequenceName(); + final int start = dis.readInt(); + final int end = dis.readInt(); + final int nCounts = dis.readInt(); + final int[] counts = new int[nCounts]; + for ( int idx = 0; idx != nCounts; ++idx ) { + counts[idx] = dis.readInt(); + } + return new DepthEvidence(contig, start, end, counts); + } + + @Override + public Class getFeatureType() { return DepthEvidence.class; } + + @Override + public boolean canDecode( final String path ) { + return path.toLowerCase().endsWith(RD_BCI_FILE_EXTENSION); + } + + @Override + public Writer makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + final String className = DepthEvidence.class.getSimpleName(); + return new Writer<>(path, + new FeaturesHeader(className, DepthEvidence.BCI_VERSION, dict, sampleNames), + this::encode, + compressionLevel); + } + + @Override + public void encode( final DepthEvidence depthEvidence, + final Writer writer ) throws IOException { + final DataOutputStream dos = writer.getStream(); + dos.writeInt(writer.getContigIndex(depthEvidence.getContig())); + dos.writeInt(depthEvidence.getStart()); + dos.writeInt(depthEvidence.getEnd()); + final int[] counts = depthEvidence.getCounts(); + dos.writeInt(counts.length); + for ( final int count : counts ) { + dos.writeInt(count); + } + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/DepthEvidenceCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/DepthEvidenceCodec.java index 62843016a7d..e6ef1bea939 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/DepthEvidenceCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/DepthEvidenceCodec.java @@ -1,17 +1,21 @@ package org.broadinstitute.hellbender.utils.codecs; import com.google.common.base.Splitter; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.LineIterator; +import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.sv.DepthEvidence; +import org.broadinstitute.hellbender.utils.io.FeatureOutputStream; import java.util.ArrayList; import java.util.List; -public class DepthEvidenceCodec extends AsciiFeatureCodec { +public class DepthEvidenceCodec extends AsciiFeatureCodec + implements FeatureOutputCodec> { public static final String FORMAT_SUFFIX = ".rd.txt"; public static final String COL_DELIMITER = "\t"; @@ -28,6 +32,9 @@ public TabixFormat getTabixFormat() { @Override public DepthEvidence decode(final String line) { + if ( line.startsWith("#Chr") ) { + return null; + } final List tokens = splitter.splitToList(line); if (tokens.size() < 3) { throw new IllegalArgumentException("Expected at least 3 columns but found " + tokens.size()); @@ -45,13 +52,11 @@ public DepthEvidence decode(final String line) { @Override public boolean canDecode(final String path) { - final String toDecode; - if (IOUtil.hasBlockCompressedExtension(path)) { - toDecode = path.substring(0, path.lastIndexOf(".")); - } else { - toDecode = path; + String toDecode = path.toLowerCase(); + if ( IOUtil.hasBlockCompressedExtension(toDecode) ) { + toDecode = toDecode.substring(0, toDecode.lastIndexOf('.')); } - return toDecode.toLowerCase().endsWith(FORMAT_SUFFIX); + return toDecode.endsWith(FORMAT_SUFFIX); } @Override @@ -59,7 +64,33 @@ public Object readActualHeader(final LineIterator reader) { if (!reader.hasNext()) { throw new UserException.BadInput("Depth evidence file did not have a header line"); } - return reader.next(); + final List headerCols = splitter.splitToList(reader.next()); + return new FeaturesHeader(DepthEvidence.class.getSimpleName(), "unknown", null, + headerCols.subList(3, headerCols.size())); + } + + @Override + public FeatureOutputStream makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + final FeatureOutputStream foStream = + new FeatureOutputStream<>(path, + getTabixFormat(), + DepthEvidenceCodec::encode, + dict, + compressionLevel); + final StringBuilder sb = new StringBuilder("#Chr\tStart\tEnd"); + for ( final String sampleName : sampleNames ) { + sb.append('\t').append(sampleName); + } + foStream.writeHeader(sb.toString()); + return foStream; + } + + @Override + public void encode( final DepthEvidence ev, final FeatureOutputStream os ) { + os.write(ev); } public static String encode(final DepthEvidence ev) { @@ -69,8 +100,8 @@ public static String encode(final DepthEvidence ev) { columns.add(ev.getContig()); columns.add(Integer.toString(ev.getStart() - 1)); columns.add(Integer.toString(ev.getEnd())); - for (int i = 0; i < numCounts; i++) { - columns.add(Integer.toString(counts[i])); + for ( final int count : counts ) { + columns.add(Integer.toString(count)); } return String.join(COL_DELIMITER, columns); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/DiscordantPairEvidenceBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/DiscordantPairEvidenceBCICodec.java new file mode 100644 index 00000000000..b53ce896947 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/DiscordantPairEvidenceBCICodec.java @@ -0,0 +1,74 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.samtools.SAMSequenceDictionary; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.sv.DiscordantPairEvidence; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.List; + +public class DiscordantPairEvidenceBCICodec extends AbstractBCICodec { + private boolean versionChecked = false; + + private static final String PE_BCI_FILE_EXTENSION = ".pe.bci"; + + @Override + public DiscordantPairEvidence decode( final Reader reader ) + throws IOException { + if ( !versionChecked ) { + if ( !DiscordantPairEvidence.BCI_VERSION.equals(reader.getVersion()) ) { + throw new UserException("baf.bci file has wrong version: expected " + + DiscordantPairEvidence.BCI_VERSION + " but found " + reader.getVersion()); + } + versionChecked = true; + } + final DataInputStream dis = reader.getStream(); + final String sample = reader.getSampleNames().get(dis.readInt()); + final SAMSequenceDictionary dict = reader.getDictionary(); + final String startContig = dict.getSequence(dis.readInt()).getSequenceName(); + final int start = dis.readInt(); + final boolean startStrand = dis.readBoolean(); + final String endContig = dict.getSequence(dis.readInt()).getSequenceName(); + final int end = dis.readInt(); + final boolean endStrand = dis.readBoolean(); + return new DiscordantPairEvidence(sample, startContig, start, startStrand, + endContig, end, endStrand); + } + + @Override + public Class getFeatureType() { return DiscordantPairEvidence.class; } + + @Override + public boolean canDecode( final String path ) { + return path.toLowerCase().endsWith(PE_BCI_FILE_EXTENSION); + } + + @Override + public Writer makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + final String className = DiscordantPairEvidence.class.getSimpleName(); + final FeaturesHeader header = + new FeaturesHeader(className, DiscordantPairEvidence.BCI_VERSION, dict, sampleNames); + return new Writer<>(path, header, this::encode, compressionLevel); + } + + @Override + public void encode( final DiscordantPairEvidence peEvidence, + final Writer writer ) throws IOException { + final DataOutputStream dos = writer.getStream(); + dos.writeInt(writer.getSampleIndex(peEvidence.getSample())); + dos.writeInt(writer.getContigIndex(peEvidence.getContig())); + dos.writeInt(peEvidence.getStart()); + dos.writeBoolean(peEvidence.getStartStrand()); + dos.writeInt(writer.getContigIndex(peEvidence.getEndContig())); + dos.writeInt(peEvidence.getEndPosition()); + dos.writeBoolean(peEvidence.getEndStrand()); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/DiscordantPairEvidenceCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/DiscordantPairEvidenceCodec.java index 169ca6f405f..824334c2393 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/DiscordantPairEvidenceCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/DiscordantPairEvidenceCodec.java @@ -1,16 +1,20 @@ package org.broadinstitute.hellbender.utils.codecs; import com.google.common.base.Splitter; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.LineIterator; +import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.tools.sv.DiscordantPairEvidence; +import org.broadinstitute.hellbender.utils.io.FeatureOutputStream; import java.util.Arrays; import java.util.List; -public class DiscordantPairEvidenceCodec extends AsciiFeatureCodec { +public class DiscordantPairEvidenceCodec extends AsciiFeatureCodec + implements FeatureOutputCodec> { public static final String FORMAT_SUFFIX = ".pe.txt"; public static final String COL_DELIMITER = "\t"; @@ -43,18 +47,34 @@ public DiscordantPairEvidence decode(final String line) { @Override public boolean canDecode(final String path) { - final String toDecode; - if (IOUtil.hasBlockCompressedExtension(path)) { - toDecode = path.substring(0, path.lastIndexOf(".")); - } else { - toDecode = path; + String toDecode = path.toLowerCase(); + if ( IOUtil.hasBlockCompressedExtension(toDecode) ) { + toDecode = toDecode.substring(0, toDecode.lastIndexOf('.')); } - return toDecode.toLowerCase().endsWith(FORMAT_SUFFIX); + return toDecode.endsWith(FORMAT_SUFFIX); } @Override public Object readActualHeader(final LineIterator reader) { return null; } + @Override + public FeatureOutputStream makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + return new FeatureOutputStream<>(path, + getTabixFormat(), + DiscordantPairEvidenceCodec::encode, + dict, + compressionLevel); + } + + @Override + public void encode( final DiscordantPairEvidence ev, + final FeatureOutputStream os ) { + os.write(ev); + } + public static String encode(final DiscordantPairEvidence ev) { final List columns = Arrays.asList( ev.getContig(), diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeatureOutputCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeatureOutputCodec.java new file mode 100644 index 00000000000..00ea0e82bef --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeatureOutputCodec.java @@ -0,0 +1,15 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.tribble.Feature; +import org.broadinstitute.hellbender.engine.GATKPath; + +import java.io.IOException; +import java.util.List; + +public interface FeatureOutputCodec> { + boolean canDecode( final String path ); + Class getFeatureType(); + S makeSink( GATKPath path, SAMSequenceDictionary dict, List sampleNames, int compressionLevel ); + void encode( F feature, S sink ) throws IOException; +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeatureSink.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeatureSink.java new file mode 100644 index 00000000000..313c3c75055 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeatureSink.java @@ -0,0 +1,8 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.tribble.Feature; + +public interface FeatureSink { + void write( F feature ); + void close(); +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeaturesHeader.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeaturesHeader.java new file mode 100644 index 00000000000..765307981cf --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/FeaturesHeader.java @@ -0,0 +1,41 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.samtools.SAMSequenceDictionary; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.List; + +public class FeaturesHeader { + private final String className; + private final String version; + private final SAMSequenceDictionary dictionary; + private final List sampleNames; + + public FeaturesHeader( final String className, + final String version, + final SAMSequenceDictionary dictionary, + final List sampleNames ) { + Utils.nonNull(className); + Utils.nonNull(version); + this.className = className; + this.version = version; + this.dictionary = dictionary; + this.sampleNames = sampleNames; + } + + public String getClassName() { + return className; + } + + public String getVersion() { + return version; + } + + public SAMSequenceDictionary getDictionary() { + return dictionary; + } + + public List getSampleNames() { + return sampleNames; + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java new file mode 100644 index 00000000000..0d8591359da --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java @@ -0,0 +1,56 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.samtools.SAMSequenceDictionary; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.sv.LocusDepth; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; + +import java.io.IOException; +import java.util.List; + +public class LocusDepthBCICodec extends AbstractBCICodec { + private boolean versionChecked = false; + private SAMSequenceDictionary dict; + private static final String LD_BCI_FILE_EXTENSION = ".ld.bci"; + + @Override + public LocusDepth decode( final Reader reader ) throws IOException { + if ( !versionChecked ) { + if ( !LocusDepth.BCI_VERSION.equals(reader.getVersion()) ) { + throw new UserException("bci file has wrong version: expected " + + LocusDepth.BCI_VERSION + " but found " + reader.getVersion()); + } + versionChecked = true; + } + return new LocusDepth(reader.getStream(), reader.getDictionary()); + } + + @Override + public Class getFeatureType() { return LocusDepth.class; } + + @Override + public boolean canDecode( final String path ) { + return path.toLowerCase().endsWith(LD_BCI_FILE_EXTENSION); + } + + @Override + public Writer makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + this.dict = dict; + final String className = LocusDepth.class.getSimpleName(); + return new Writer<>(path, + new FeaturesHeader(className, LocusDepth.BCI_VERSION, dict, sampleNames), + this::encode, + compressionLevel); + } + + @Override + public void encode( final LocusDepth locusDepth, final Writer writer ) + throws IOException { + locusDepth.write(writer.getStream(), dict); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java new file mode 100644 index 00000000000..2f2c2791235 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java @@ -0,0 +1,76 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import com.google.common.base.Splitter; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.IOUtil; +import htsjdk.tribble.AsciiFeatureCodec; +import htsjdk.tribble.index.tabix.TabixFormat; +import htsjdk.tribble.readers.LineIterator; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.tools.sv.LocusDepth; +import org.broadinstitute.hellbender.utils.io.FeatureOutputStream; + +import java.util.List; + +public class LocusDepthCodec extends AsciiFeatureCodec + implements FeatureOutputCodec> { + public static final String FORMAT_SUFFIX = ".ld.txt"; + private static final Splitter splitter = Splitter.on("\t"); + + public LocusDepthCodec() { + super(LocusDepth.class); + } + + @Override public TabixFormat getTabixFormat() { + return new TabixFormat(TabixFormat.ZERO_BASED, 1, 2, 0, '#', 0); + } + + @Override public LocusDepth decode( final String line ) { + final List tokens = splitter.splitToList(line); + if ( tokens.size() != 7 ) { + throw new IllegalArgumentException("Invalid number of columns: " + tokens.size()); + } + return new LocusDepth(tokens.get(0), + Integer.parseUnsignedInt(tokens.get(1)) + 1, + (byte)tokens.get(2).charAt(0), + Integer.parseUnsignedInt(tokens.get(3)), + Integer.parseUnsignedInt(tokens.get(4)), + Integer.parseUnsignedInt(tokens.get(5)), + Integer.parseUnsignedInt(tokens.get(6))); + } + + @Override public Object readActualHeader( LineIterator reader ) { return null; } + + @Override + public boolean canDecode( final String path ) { + String toDecode = path.toLowerCase(); + if ( IOUtil.hasBlockCompressedExtension(toDecode) ) { + toDecode = toDecode.substring(0, toDecode.lastIndexOf('.')); + } + return toDecode.endsWith(FORMAT_SUFFIX); + } + + @Override + public FeatureOutputStream makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + return new FeatureOutputStream<>(path, + getTabixFormat(), + LocusDepthCodec::encode, + dict, + compressionLevel); + } + + @Override + public void encode( final LocusDepth ev, final FeatureOutputStream os ) { + os.write(ev); + } + + public static String encode( final LocusDepth locusDepth ) { + return locusDepth.getContig() + "\t" + (locusDepth.getStart() - 1) + "\t" + + locusDepth.getRefCall() + "\t" + locusDepth.getADepth() + "\t" + + locusDepth.getCDepth() + "\t" + locusDepth.getGDepth() + "\t" + + locusDepth.getTDepth(); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/SplitReadEvidenceBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/SplitReadEvidenceBCICodec.java new file mode 100644 index 00000000000..cc860e8a741 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/SplitReadEvidenceBCICodec.java @@ -0,0 +1,66 @@ +package org.broadinstitute.hellbender.utils.codecs; + +import htsjdk.samtools.SAMSequenceDictionary; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.sv.SplitReadEvidence; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.List; + +public class SplitReadEvidenceBCICodec extends AbstractBCICodec { + private boolean versionChecked = false; + private static final String SR_BCI_FILE_EXTENSION = ".sr.bci"; + + @Override + public SplitReadEvidence decode( final Reader reader ) throws IOException { + if ( !versionChecked ) { + if ( !SplitReadEvidence.BCI_VERSION.equals(reader.getVersion()) ) { + throw new UserException("baf.bci file has wrong version: expected " + + SplitReadEvidence.BCI_VERSION + " but found " + reader.getVersion()); + } + versionChecked = true; + } + final DataInputStream dis = reader.getStream(); + final String sample = reader.getSampleNames().get(dis.readInt()); + final String contig = reader.getDictionary().getSequence(dis.readInt()).getSequenceName(); + final int position = dis.readInt(); + final int count = dis.readInt(); + final boolean strand = dis.readBoolean(); + return new SplitReadEvidence(sample, contig, position, count, strand); + } + + @Override + public Class getFeatureType() { return SplitReadEvidence.class; } + + @Override + public boolean canDecode( final String path ) { + return path.toLowerCase().endsWith(SR_BCI_FILE_EXTENSION); + } + + @Override + public Writer makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + final String className = SplitReadEvidence.class.getSimpleName(); + final FeaturesHeader header = + new FeaturesHeader(className, SplitReadEvidence.BCI_VERSION, dict, sampleNames); + return new Writer<>(path, header, this::encode, compressionLevel); + } + + @Override + public void encode( final SplitReadEvidence srEvidence, + final Writer writer ) throws IOException { + final DataOutputStream dos = writer.getStream(); + dos.writeInt(writer.getSampleIndex(srEvidence.getSample())); + dos.writeInt(writer.getContigIndex(srEvidence.getContig())); + dos.writeInt(srEvidence.getStart()); + dos.writeInt(srEvidence.getCount()); + dos.writeBoolean(srEvidence.getStrand()); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/SplitReadEvidenceCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/SplitReadEvidenceCodec.java index 4e7057f3f4e..e2fe5b5e537 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/SplitReadEvidenceCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/SplitReadEvidenceCodec.java @@ -1,16 +1,20 @@ package org.broadinstitute.hellbender.utils.codecs; import com.google.common.base.Splitter; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.LineIterator; +import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.tools.sv.SplitReadEvidence; +import org.broadinstitute.hellbender.utils.io.FeatureOutputStream; import java.util.Arrays; import java.util.List; -public class SplitReadEvidenceCodec extends AsciiFeatureCodec { +public class SplitReadEvidenceCodec extends AsciiFeatureCodec + implements FeatureOutputCodec> { public static final String FORMAT_SUFFIX = ".sr.txt"; public static final String COL_DELIMITER = "\t"; @@ -46,18 +50,34 @@ public SplitReadEvidence decode(final String line) { @Override public boolean canDecode(final String path) { - final String toDecode; - if (IOUtil.hasBlockCompressedExtension(path)) { - toDecode = path.substring(0, path.lastIndexOf(".")); - } else { - toDecode = path; + String toDecode = path.toLowerCase(); + if ( IOUtil.hasBlockCompressedExtension(toDecode) ) { + toDecode = toDecode.substring(0, toDecode.lastIndexOf('.')); } - return toDecode.toLowerCase().endsWith(FORMAT_SUFFIX); + return toDecode.endsWith(FORMAT_SUFFIX); } @Override public Object readActualHeader(final LineIterator reader) { return null; } + @Override + public FeatureOutputStream makeSink( final GATKPath path, + final SAMSequenceDictionary dict, + final List sampleNames, + final int compressionLevel ) { + return new FeatureOutputStream<>(path, + getTabixFormat(), + SplitReadEvidenceCodec::encode, + dict, + compressionLevel); + } + + @Override + public void encode( final SplitReadEvidence ev, + final FeatureOutputStream os ) { + os.write(ev); + } + public static String encode(final SplitReadEvidence ev) { final List columns = Arrays.asList( ev.getContig(), diff --git a/src/main/java/org/broadinstitute/hellbender/utils/io/BlockCompressedIntervalStream.java b/src/main/java/org/broadinstitute/hellbender/utils/io/BlockCompressedIntervalStream.java new file mode 100644 index 00000000000..9076645602d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/io/BlockCompressedIntervalStream.java @@ -0,0 +1,612 @@ +package org.broadinstitute.hellbender.utils.io; + +import com.google.common.annotations.VisibleForTesting; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.seekablestream.SeekableBufferedStream; +import htsjdk.samtools.seekablestream.SeekableStream; +import htsjdk.samtools.seekablestream.SeekableStreamFactory; +import htsjdk.samtools.util.BlockCompressedInputStream; +import htsjdk.samtools.util.BlockCompressedOutputStream; +import htsjdk.samtools.util.BlockCompressedStreamConstants; +import htsjdk.tribble.*; +import org.broadinstitute.hellbender.engine.FeatureInput; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.codecs.FeatureSink; +import org.broadinstitute.hellbender.utils.codecs.FeaturesHeader; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Path; +import java.util.*; + +public class BlockCompressedIntervalStream { + + // our final empty block adds to the extra information + // it has a virtual file pointer to the start of the index + public static final byte[] EMPTY_GZIP_BLOCK_WITH_INDEX_POINTER = { + BlockCompressedStreamConstants.GZIP_ID1, + (byte)BlockCompressedStreamConstants.GZIP_ID2, + BlockCompressedStreamConstants.GZIP_CM_DEFLATE, + BlockCompressedStreamConstants.GZIP_FLG, + 0, 0, 0, 0, // modification time + BlockCompressedStreamConstants.GZIP_XFL, + (byte)BlockCompressedStreamConstants.GZIP_OS_UNKNOWN, + BlockCompressedStreamConstants.GZIP_XLEN + 12, 0, + BlockCompressedStreamConstants.BGZF_ID1, + BlockCompressedStreamConstants.BGZF_ID2, + BlockCompressedStreamConstants.BGZF_LEN, 0, + 39, 0, // Total block size - 1 as little-endian short + (byte)'I', (byte)'P', 8, 0, // index pointer extra data + + // 8-byte little-endian long representing file-pointer to beginning of index + // (this data gets overwritten each time a stream is closed) + 1, 2, 3, 4, 5, 6, 7, 8, + + 3, 0, // empty payload + 0, 0, 0, 0, // crc + 0, 0, 0, 0, // uncompressedSize + }; + public static final int FILE_POINTER_OFFSET = 22; + + public static final String BCI_FILE_EXTENSION = ".bci"; + + // each compressed block of data will have (at least) one of these as a part of the index + // for each contig that appears in a compressed block, the SVInterval tracks the smallest + // starting coordinate and largest end coordinate of any object in the block + // the filePosition member is the virtual file offset of the first object in the block (or, the + // first object for a new contig, if there are multiple contigs represented within the block) + public final static class IndexEntry { + final SVInterval interval; + long filePosition; + + public IndexEntry( final SVInterval interval, final long filePosition ) { + this.interval = interval; + this.filePosition = filePosition; + } + + public IndexEntry( final DataInputStream dis ) throws IOException { + this.interval = new SVInterval(dis.readInt(), dis.readInt(), dis.readInt()); + this.filePosition = dis.readLong(); + } + + public SVInterval getInterval() { return interval; } + public long getFilePosition() { return filePosition; } + + public void write( final DataOutputStream dos ) throws IOException { + dos.writeInt(interval.getContig()); + dos.writeInt(interval.getStart()); + dos.writeInt(interval.getEnd()); + dos.writeLong(filePosition); + } + } + + @FunctionalInterface + public interface WriteFunc { + void write( F feature, Writer writer ) throws IOException; + } + + // a class for writing arbitrary objects to a block compressed stream with a self-contained index + // the only restriction is that you must supply a lambda that writes enough state to a DataOutputStream + // to allow you to reconstitute the object when you read it back in later + public static class Writer implements FeatureSink { + final String path; + final SAMSequenceDictionary dict; + final Map sampleMap; + final WriteFunc writeFunc; + final OutputStream os; + final BlockCompressedOutputStream bcos; + final DataOutputStream dos; + Feature lastInterval; + final List indexEntries; + long blockFilePosition; + int blockContig; + int blockStart; + int blockEnd; + boolean firstBlockMember; + + public final static int DEFAULT_COMPRESSION_LEVEL = 6; + + public Writer( final GATKPath path, + final FeaturesHeader header, + final WriteFunc writeFunc ) { + this(path, header, writeFunc, DEFAULT_COMPRESSION_LEVEL); + } + + public Writer( final GATKPath path, + final FeaturesHeader header, + final WriteFunc writeFunc, + final int compressionLevel ) { + this.path = path.toString(); + this.dict = header.getDictionary(); + this.sampleMap = createSampleMap(header.getSampleNames()); + this.writeFunc = writeFunc; + this.os = path.getOutputStream(); + this.bcos = new BlockCompressedOutputStream(os, (Path)null, compressionLevel); + this.dos = new DataOutputStream(bcos); + this.lastInterval = null; + this.indexEntries = new ArrayList<>(); + this.firstBlockMember = true; + writeHeader(header); + } + + @VisibleForTesting + public Writer( final String streamSource, + final OutputStream os, + final FeaturesHeader header, + final WriteFunc writeFunc ) { + this.path = streamSource; + this.dict = header.getDictionary(); + this.sampleMap = createSampleMap(header.getSampleNames()); + this.writeFunc = writeFunc; + this.os = os; + this.bcos = new BlockCompressedOutputStream(os, (Path)null, DEFAULT_COMPRESSION_LEVEL); + this.dos = new DataOutputStream(bcos); + this.lastInterval = null; + this.indexEntries = new ArrayList<>(); + this.firstBlockMember = true; + writeHeader(header); + } + + private Map createSampleMap( final List sampleNames ) { + final Map sampleMap = new HashMap<>(sampleNames.size() * 3 / 2); + for ( final String sampleName : sampleNames ) { + sampleMap.put(sampleName, sampleMap.size()); + } + return sampleMap; + } + + private void writeHeader( final FeaturesHeader header ) { + try { + writeClassAndVersion(header.getClassName(), header.getVersion()); + writeSamples(header.getSampleNames()); + writeDictionary(header.getDictionary()); + dos.flush(); + } catch ( final IOException ioe ) { + throw new UserException("can't write header to " + path, ioe); + } + } + + private void writeClassAndVersion( final String className, final String version ) + throws IOException { + dos.writeUTF(className); + dos.writeUTF(version); + } + + private void writeSamples( final List sampleNames ) throws IOException { + dos.writeInt(sampleNames.size()); + for ( final String sampleName : sampleNames ) { + dos.writeUTF(sampleName); + } + } + + private void writeDictionary( final SAMSequenceDictionary dictionary ) throws IOException { + dos.writeInt(dictionary.size()); + for ( final SAMSequenceRecord rec : dictionary.getSequences() ) { + dos.writeInt(rec.getSequenceLength()); + dos.writeUTF(rec.getSequenceName()); + } + } + + public DataOutputStream getStream() { return dos; } + + public int getSampleIndex( final String sampleName ) { + final Integer sampleIndex = sampleMap.get(sampleName); + if ( sampleIndex == null ) { + throw new IllegalArgumentException("can't find index for sampleName " + sampleName); + } + return sampleIndex; + } + + public int getContigIndex( final String contigName ) { + final SAMSequenceRecord contig = dict.getSequence(contigName); + if ( contig == null ) { + throw new UserException("can't find contig name " + contigName); + } + return contig.getSequenceIndex(); + } + + @Override + public void write( final F feature ) { + final long prevFilePosition = bcos.getPosition(); + // write the object + try { + writeFunc.write(feature, this); + } catch ( final IOException ioe ) { + throw new UserException("can't write to " + path, ioe); + } + + // if this is the first interval we've seen in a block, just capture the block-start data + if ( firstBlockMember || lastInterval == null ) { + startBlock(prevFilePosition, feature); + return; + } + + // if the contig changes emit a new index entry (for the previous contig) and + // restart tracking of the block + if ( !feature.contigsMatch(lastInterval) ) { + addIndexEntry(); + startBlock(prevFilePosition, feature); + return; + } + + // extend the tracked interval, as necessary + blockEnd = Math.max(blockEnd, feature.getEnd()); + lastInterval = feature; + + // if writing this element caused a new block to be compressed and added to the file + if ( isNewBlock(prevFilePosition, bcos.getPosition()) ) { + addIndexEntry(); + firstBlockMember = true; + } + } + + @Override + public void close() { + // take care of any pending index entry, if necessary + if ( !firstBlockMember ) { + addIndexEntry(); + } + + try { + dos.flush(); // complete the data block + + long indexPosition = bcos.getPosition(); // current position is the start of the index + + // write the index entries + dos.writeInt(indexEntries.size()); + for ( final IndexEntry indexEntry : indexEntries ) { + indexEntry.write(dos); + } + dos.flush(); // and complete the block + + // write a 0-length terminator block at the end that captures the index position + final byte[] emptyBlockWithIndexPointer = + Arrays.copyOf(EMPTY_GZIP_BLOCK_WITH_INDEX_POINTER, + EMPTY_GZIP_BLOCK_WITH_INDEX_POINTER.length); + for ( int idx = FILE_POINTER_OFFSET; idx != FILE_POINTER_OFFSET + 8; ++idx ) { + emptyBlockWithIndexPointer[idx] = (byte)indexPosition; + indexPosition >>>= 8; + } + os.write(emptyBlockWithIndexPointer); + + bcos.close(false); // we've already handled the terminator block + } catch ( final IOException ioe ) { + throw new UserException("unable to add index and close " + path, ioe); + } + } + + private void startBlock( final long filePosition, final Feature interval ) { + blockFilePosition = filePosition; + lastInterval = interval; + blockContig = dict.getSequenceIndex(interval.getContig()); + blockStart = interval.getStart(); + blockEnd = interval.getEnd(); + firstBlockMember = false; + } + + private void addIndexEntry() { + final SVInterval blockInterval = new SVInterval(blockContig, blockStart, blockEnd); + indexEntries.add(new IndexEntry(blockInterval, blockFilePosition)); + } + } + + // a class for reading arbitrary objects from a block compressed stream with a self-contained index + // the only restriction is that you must supply a lambda that reads from a DataInputStream + // to reconstitute the object. + public static final class Reader implements FeatureReader { + final String path; + final FeatureCodec> codec; + final long indexFilePointer; + final BlockCompressedInputStream bcis; + final DataInputStream dis; + final FeaturesHeader header; + final long dataFilePointer; + SVIntervalTree index; + boolean usedByIterator; + + public Reader( final FeatureInput inputDescriptor, final FeatureCodec> codec ) { + this.path = inputDescriptor.getRawInputString(); + this.codec = codec; + final SeekableStream ss; + try { + ss = SeekableStreamFactory.getInstance().getStreamFor(path); + } catch ( final IOException ioe ) { + throw new UserException("unable to open " + path, ioe); + } + this.indexFilePointer = findIndexFilePointer(ss); + this.bcis = new BlockCompressedInputStream(new SeekableBufferedStream(ss)); + this.dis = new DataInputStream(bcis); + this.header = readHeader(); + this.dataFilePointer = bcis.getPosition(); // having read header, we're pointing at the data + this.index = null; + this.usedByIterator = false; + final String expectedClassName = codec.getFeatureType().getSimpleName(); + if ( !header.getClassName().equals(expectedClassName) ) { + throw new UserException("can't use " + path + " to read " + expectedClassName + + " features -- it contains " + header.getClassName() + " features"); + } + } + + public Reader( final Reader reader ) { + this.path = reader.path; + this.codec = reader.codec; + this.indexFilePointer = reader.indexFilePointer; + try { + this.bcis = new BlockCompressedInputStream( + new SeekableBufferedStream( + SeekableStreamFactory.getInstance().getStreamFor(path))); + } catch ( final IOException ioe ) { + throw new UserException("unable to clone stream for " + path, ioe); + } + this.dis = new DataInputStream(bcis); + this.header = reader.header; + this.dataFilePointer = reader.dataFilePointer; + this.index = reader.index; + this.usedByIterator = true; + } + + @VisibleForTesting + public Reader( final String inputStreamName, + final SeekableStream ss, + final FeatureCodec> codec ) { + this.path = inputStreamName; + this.codec = codec; + this.indexFilePointer = findIndexFilePointer(ss); + this.bcis = new BlockCompressedInputStream(new SeekableBufferedStream(ss)); + this.dis = new DataInputStream(bcis); + this.header = readHeader(); + this.dataFilePointer = bcis.getPosition(); + this.index = null; + this.usedByIterator = false; + } + + public FeatureCodecHeader getFeatureCodecHeader() { + return new FeatureCodecHeader(header, dataFilePointer); + } + public DataInputStream getStream() { return dis; } + public List getSampleNames() { return header.getSampleNames(); } + public SAMSequenceDictionary getDictionary() { return header.getDictionary(); } + public String getVersion() { return header.getVersion(); } + + public boolean hasNext() { + final long position = bcis.getPosition(); + // A BlockCompressedInputStream returns a 0 position when closed + return position > 0 && position < indexFilePointer; + } + + @Override + public CloseableTribbleIterator query( final String chr, final int start, final int end ) + throws IOException { + if ( index == null ) { + loadIndex(bcis); + } + final SVInterval interval = + new SVInterval(getDictionary().getSequenceIndex(chr), start, end); + return new OverlapIterator<>(interval, this); + } + + @Override public CloseableTribbleIterator iterator() { + return new CompleteIterator<>(this); + } + + @Override public void close() { + try { + dis.close(); + } catch ( final IOException ioe ) { + throw new UserException("unable to close " + path, ioe); + } + } + + @Override public List getSequenceNames() { + final SAMSequenceDictionary dict = getDictionary(); + final List names = new ArrayList<>(dict.size()); + for ( final SAMSequenceRecord rec : dict.getSequences() ) { + names.add(rec.getSequenceName()); + } + return names; + } + + @Override public Object getHeader() { return header; } + + @Override public boolean isQueryable() { return true; } + + public long getPosition() { return bcis.getPosition(); } + + public void seekStream( final long filePointer ) { + try { + bcis.seek(filePointer); + } catch ( final IOException ioe ) { + throw new UserException("unable to position stream for " + path, ioe); + } + } + + public T readStream() { + try { + return codec.decode(this); + } catch ( final IOException ioe ) { + throw new GATKException("can't read " + path, ioe); + } + } + + private long findIndexFilePointer( final SeekableStream ss ) { + final int finalBlockLen = EMPTY_GZIP_BLOCK_WITH_INDEX_POINTER.length; + final byte[] finalBlock = new byte[finalBlockLen]; + try { + ss.seek(ss.length() - finalBlockLen); + ss.readFully(finalBlock); + ss.seek(0); + } catch ( final IOException ioe ) { + throw new UserException("unable to read final bgzip block from " + path, ioe); + } + for ( int idx = 0; idx != FILE_POINTER_OFFSET; ++idx ) { + if ( EMPTY_GZIP_BLOCK_WITH_INDEX_POINTER[idx] != finalBlock[idx] ) { + throw new UserException( + "unable to recover index pointer from final block of " + path); + } + } + for ( int idx = FILE_POINTER_OFFSET + 8; idx != finalBlockLen; ++idx ) { + if ( EMPTY_GZIP_BLOCK_WITH_INDEX_POINTER[idx] != finalBlock[idx] ) { + throw new UserException( + "unable to recover index pointer from final block of " + path); + } + } + long indexFilePointer = 0; + int idx = FILE_POINTER_OFFSET + 8; + while ( --idx >= FILE_POINTER_OFFSET ) { + indexFilePointer <<= 8; + indexFilePointer |= finalBlock[idx] & 0xFFL; + } + return indexFilePointer; + } + + private FeaturesHeader readHeader() { + try { + final String className = dis.readUTF(); + final String version = dis.readUTF(); + final List sampleNames = readSampleNames(dis); + final SAMSequenceDictionary dictionary = readDictionary(dis); + return new FeaturesHeader(className, version, dictionary, sampleNames); + } catch ( final IOException ioe ) { + throw new UserException("can't read header from " + path, ioe); + } + } + + private List readSampleNames( final DataInputStream dis ) throws IOException { + final int nSamples = dis.readInt(); + final List sampleNames = new ArrayList<>(nSamples); + for ( int sampleId = 0; sampleId < nSamples; ++sampleId ) { + sampleNames.add(dis.readUTF()); + } + return sampleNames; + } + + private SAMSequenceDictionary readDictionary( final DataInputStream dis ) throws IOException { + final int nRecs = dis.readInt(); + final List seqRecs = new ArrayList<>(nRecs); + for ( int idx = 0; idx != nRecs; ++idx ) { + final int contigSize = dis.readInt(); + final String contigName = dis.readUTF(); + seqRecs.add(new SAMSequenceRecord(contigName, contigSize)); + } + return new SAMSequenceDictionary(seqRecs); + } + + private void loadIndex( final BlockCompressedInputStream bcis ) { + final SVIntervalTree intervalTree = new SVIntervalTree<>(); + try { + bcis.seek(indexFilePointer); + final DataInputStream dis = new DataInputStream(bcis); + int nEntries = dis.readInt(); + while ( nEntries-- > 0 ) { + final IndexEntry entry = new IndexEntry(dis); + intervalTree.put(entry.getInterval(), entry.getFilePosition()); + } + bcis.seek(dataFilePointer); + } catch ( final IOException ioe ) { + throw new UserException("unable to read index from " + path, ioe); + } + index = intervalTree; + } + + private Reader getReaderForIterator() { + if ( !usedByIterator ) { + usedByIterator = true; + return this; + } + return new Reader<>(this); + } + + private static class CompleteIterator + implements CloseableTribbleIterator { + final Reader reader; + public CompleteIterator( final Reader reader ) { + this.reader = reader.getReaderForIterator(); + reader.seekStream(reader.dataFilePointer); + } + + @Override public Iterator iterator() { + return new CompleteIterator<>(reader); + } + + @Override public boolean hasNext() { + return reader.hasNext(); + } + + @Override public T next() { + if ( !hasNext() ) { + throw new NoSuchElementException("feature iterator has no next element"); + } + return reader.readStream(); + } + + @Override public void close() { reader.close(); } + } + // find all the objects in the stream inflating just those blocks that might have relevant objects + private static class OverlapIterator + implements CloseableTribbleIterator { + final SVInterval interval; + final Reader reader; + final Iterator> indexEntryIterator; + long blockStartPosition; + T nextT; + + public OverlapIterator( final SVInterval interval, final Reader reader ) { + this.interval = interval; + this.reader = reader.getReaderForIterator(); + this.indexEntryIterator = reader.index.overlappers(interval); + this.blockStartPosition = -1; + advance(); + } + + @Override public boolean hasNext() { return nextT != null; } + + @Override public T next() { + final T result = nextT; + if ( result == null ) { + throw new NoSuchElementException("overlapper iterator has no next element"); + } + advance(); + return result; } + + @Override public void close() { reader.close(); nextT = null; } + + @Override public CloseableTribbleIterator iterator() { + return new OverlapIterator<>(interval, reader); + } + + private void advance() { + do { + if ( isNewBlock(blockStartPosition, reader.getPosition()) ) { + if ( !indexEntryIterator.hasNext() ) { + nextT = null; + return; + } + blockStartPosition = indexEntryIterator.next().getValue(); + reader.seekStream(blockStartPosition); + } + nextT = reader.readStream(); + final int nextContigId = + reader.getDictionary().getSequenceIndex(nextT.getContig()); + if ( interval.getContig() != nextContigId || + interval.getEnd() < nextT.getStart() ) { + nextT = null; + return; + } + } while ( nextT.getEnd() < interval.getStart() ); + } + } + } + + public static boolean isNewBlock( final long filePosition1, final long filePosition2 ) { + // upper 48 bits contain the block offset + // check to see if there are any bit differences in those upper 48 bits + return ((filePosition1 ^ filePosition2) & ~0xffffL) != 0; + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStream.java b/src/main/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStream.java index 9bafa5ef162..c3bed39c854 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStream.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStream.java @@ -4,16 +4,19 @@ import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.PositionalOutputStream; +import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.Feature; import htsjdk.tribble.FeatureCodec; import htsjdk.tribble.index.Index; import htsjdk.tribble.index.IndexCreator; +import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.index.tabix.TabixIndexCreator; import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.codecs.FeatureSink; +import org.broadinstitute.hellbender.utils.codecs.FeaturesHeader; -import java.io.Closeable; import java.io.IOException; import java.nio.file.Path; import java.util.function.Function; @@ -22,38 +25,39 @@ * Class for output streams that encode Tribble {@link Feature}s. Supports block-compressed output, which is * detected by the output file path extension, in which case a Tabix index is also generated. */ -public class FeatureOutputStream implements Closeable { +public class FeatureOutputStream implements FeatureSink { private static final String NEWLINE_CHARACTER = "\n"; + private final Function encoder; private final PositionalOutputStream outputStream; private final IndexCreator indexCreator; - private final Function encoder; private final Path featurePath; /** * @param file file to write to - * @param codec feature codec - * @param encoder encoding function mapping a feature to its text record (whithout a newline character) - * @param dictionary sequence dictionary - * @param compressionLevel block compression level (ignored if file path does not have a block-compressed extension) + * @param tabixFormat column descriptions for the tabix index + * @param header metaData for the features */ - public FeatureOutputStream(final GATKPath file, final FeatureCodec codec, - final Function encoder, final SAMSequenceDictionary dictionary, - final int compressionLevel) { + public FeatureOutputStream( final GATKPath file, + final TabixFormat tabixFormat, + final Function encoder, + final SAMSequenceDictionary dict, + final int compressionLevel ) { Utils.nonNull(file); - Utils.nonNull(codec); + Utils.nonNull(tabixFormat); Utils.nonNull(encoder); - Utils.nonNull(dictionary); + Utils.nonNull(dict); + this.encoder = encoder; if (IOUtil.hasBlockCompressedExtension(file.toPath())) { - outputStream = new PositionalOutputStream(new BlockCompressedOutputStream(file.toString(), compressionLevel)); - indexCreator = new TabixIndexCreator(dictionary, codec.getTabixFormat()); + outputStream = new PositionalOutputStream( + new BlockCompressedOutputStream(file.toString(), compressionLevel)); + indexCreator = new TabixIndexCreator(dict, tabixFormat); } else { outputStream = new PositionalOutputStream(file.getOutputStream()); indexCreator = null; } featurePath = file.toPath(); - this.encoder = encoder; } /** @@ -75,7 +79,8 @@ public void writeHeader(final String header) { * * @param feature feature to write, cannot be null */ - public void add(final F feature) { + @Override + public void write(final F feature) { Utils.nonNull(feature); if (indexCreator != null) { indexCreator.addFeature(feature, outputStream.getPosition()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java index cecb4d44268..ff2d2d79a95 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java @@ -23,8 +23,8 @@ import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; import org.broadinstitute.hellbender.tools.spark.sv.integration.SVIntegrationTestDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.utils.PairedStrandedIntervalTree; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.mockito.Mockito; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ImpreciseVariantDetectorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ImpreciseVariantDetectorUnitTest.java index 8bf33b13f10..1daae1d7f99 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ImpreciseVariantDetectorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ImpreciseVariantDetectorUnitTest.java @@ -15,7 +15,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; import org.broadinstitute.hellbender.tools.spark.sv.utils.PairedStrandedIntervalTree; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.mockito.Mockito; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointDensityFilterTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointDensityFilterTest.java index 9c9cb34d663..162706067f4 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointDensityFilterTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointDensityFilterTest.java @@ -1,7 +1,7 @@ package org.broadinstitute.hellbender.tools.spark.sv.evidence; import htsjdk.samtools.SAMFileHeader; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.utils.FlatMapGluer; import org.broadinstitute.hellbender.utils.IntHistogramTest; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidenceTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidenceTest.java index 0f3ae15992d..3d270ee83ca 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidenceTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/BreakpointEvidenceTest.java @@ -4,7 +4,7 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import htsjdk.samtools.SAMFileHeader; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.utils.IntHistogramTest; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLinkClustererTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLinkClustererTest.java index 2be267510af..56143748e0c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLinkClustererTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/EvidenceTargetLinkClustererTest.java @@ -2,7 +2,7 @@ import htsjdk.samtools.SAMFileHeader; import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.tools.spark.utils.IntHistogram; import org.broadinstitute.hellbender.utils.IntHistogramTest; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBreakpointEvidenceSparkUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBreakpointEvidenceSparkUnitTest.java index 232cefd3b1e..0aaee83deb6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBreakpointEvidenceSparkUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBreakpointEvidenceSparkUnitTest.java @@ -13,6 +13,8 @@ import org.broadinstitute.hellbender.tools.spark.utils.HopscotchUniqueMultiMapSpark; import org.broadinstitute.hellbender.tools.spark.utils.IntHistogram; import org.broadinstitute.hellbender.utils.IntHistogramTest; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.GATKBaseTest; import org.testng.Assert; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/IntervalCoverageFinderUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/IntervalCoverageFinderUnitTest.java index 48e3df1e931..b67b94e74aa 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/IntervalCoverageFinderUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/IntervalCoverageFinderUnitTest.java @@ -3,7 +3,7 @@ import htsjdk.samtools.SAMFileHeader; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.IntHistogramTest; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/QNameFinderTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/QNameFinderTest.java index 201219a4e69..367df23fc1e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/QNameFinderTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/QNameFinderTest.java @@ -3,12 +3,11 @@ import htsjdk.samtools.SAMFileHeader; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.IntHistogramTest; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; -import org.broadinstitute.hellbender.utils.read.ReadUtils; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/XGBoostEvidenceFilterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/XGBoostEvidenceFilterUnitTest.java index 29525b9a12c..6f25766b57c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/XGBoostEvidenceFilterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/evidence/XGBoostEvidenceFilterUnitTest.java @@ -13,7 +13,7 @@ import org.broadinstitute.hellbender.engine.spark.SparkContextFactory; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.spark.sv.discovery.TestUtilsForAssemblyBasedSVDiscovery; -import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.StrandedInterval; import org.broadinstitute.hellbender.tools.spark.utils.IntHistogram; import org.broadinstitute.hellbender.utils.Utils; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/PairedStrandedIntervalTreeTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/PairedStrandedIntervalTreeTest.java index ae2fb245ae1..391db19a6da 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/PairedStrandedIntervalTreeTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/PairedStrandedIntervalTreeTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.hellbender.tools.spark.sv.utils; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.SVInterval; import org.testng.Assert; import org.testng.annotations.Test; import scala.Tuple2; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java new file mode 100644 index 00000000000..e22d3d4a57e --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java @@ -0,0 +1,77 @@ +package org.broadinstitute.hellbender.tools.sv; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.seekablestream.ByteArraySeekableStream; +import org.broadinstitute.hellbender.utils.codecs.BafEvidenceBCICodec; +import org.broadinstitute.hellbender.utils.codecs.BafEvidenceCodec; +import org.broadinstitute.hellbender.utils.codecs.FeaturesHeader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.List; + +public class BafEvidenceUnitTest { + private static final SAMSequenceDictionary dict = new SAMSequenceDictionary(); + private static final List samples = new ArrayList<>(3); + private static final List bafs = new ArrayList<>(18); + static { + dict.addSequence(new SAMSequenceRecord("21", 46709983)); + dict.addSequence(new SAMSequenceRecord("22", 50818468)); + samples.add("A"); + samples.add("B"); + samples.add("C"); + bafs.add(new BafEvidence("B","21",25993443,0.5185185185185185)); + bafs.add(new BafEvidence("A","21",25995506,0.43333333333333335)); + bafs.add(new BafEvidence("A","21",25996383,0.46875)); + bafs.add(new BafEvidence("A","21",25996446,0.6)); + bafs.add(new BafEvidence("A","21",25996679,0.5263157894736842)); + bafs.add(new BafEvidence("A","21",25996772,0.3751515151515151)); + bafs.add(new BafEvidence("A","21",25996907,0.5660377358490566)); + bafs.add(new BafEvidence("A","21",25997040,0.4753753753753754)); + bafs.add(new BafEvidence("A","21",25997094,0.518984337921215)); + bafs.add(new BafEvidence("B","22",30000353,0.5666666666666667)); + bafs.add(new BafEvidence("B","22",30003011,0.5555555555555556)); + bafs.add(new BafEvidence("B","22",30004712,0.5526315789473685)); + bafs.add(new BafEvidence("B","22",30005667,0.5925925925925926)); + bafs.add(new BafEvidence("B","22",30007780,0.36363636363636365)); + bafs.add(new BafEvidence("C","22",30010391,0.514162077104642)); + bafs.add(new BafEvidence("B","22",30012721,0.34782608695652173)); + bafs.add(new BafEvidence("C","22",30012825,0.6266666666666667)); + bafs.add(new BafEvidence("B","22",30016476,0.18181818181818182)); + } + + @Test + public void testTextRoundTrip() { + final BafEvidenceCodec codec = new BafEvidenceCodec(); + for ( final BafEvidence be : bafs ) { + Assert.assertEquals(codec.decode(BafEvidenceCodec.encode(be)), be); + } + } + + @Test + public void testBinaryRoundTrip() { + final BafEvidenceBCICodec codec = new BafEvidenceBCICodec(); + final ByteArrayOutputStream os = new ByteArrayOutputStream(1024); + final FeaturesHeader header = + new FeaturesHeader(BafEvidence.class.getSimpleName(), BafEvidence.BCI_VERSION, dict, samples); + final Writer writer = + new Writer<>("in-memory stream", os, header, codec::encode); + for ( final BafEvidence be : bafs ) { + writer.write(be); + } + writer.close(); + final ByteArraySeekableStream ss = new ByteArraySeekableStream(os.toByteArray()); + final Reader reader = + new Reader<>("in-memory stream", ss, codec); + final List recoveredBafs = new ArrayList<>(18); + while ( reader.hasNext() ) { + recoveredBafs.add(reader.readStream()); + } + Assert.assertEquals(recoveredBafs, bafs); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/DepthEvidenceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/DepthEvidenceUnitTest.java new file mode 100644 index 00000000000..a8f50ed9e50 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/DepthEvidenceUnitTest.java @@ -0,0 +1,77 @@ +package org.broadinstitute.hellbender.tools.sv; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.seekablestream.ByteArraySeekableStream; +import org.broadinstitute.hellbender.utils.codecs.DepthEvidenceBCICodec; +import org.broadinstitute.hellbender.utils.codecs.DepthEvidenceCodec; +import org.broadinstitute.hellbender.utils.codecs.FeaturesHeader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.List; + +public class DepthEvidenceUnitTest { + private static final SAMSequenceDictionary dict = new SAMSequenceDictionary(); + private static final List samples = new ArrayList<>(3); + private static final List depths = new ArrayList<>(18); + static { + dict.addSequence(new SAMSequenceRecord("21", 46709983)); + dict.addSequence(new SAMSequenceRecord("22", 50818468)); + samples.add("A"); + samples.add("B"); + samples.add("C"); + depths.add(new DepthEvidence("21",25999208,25999308,new int[]{15,23,18})); + depths.add(new DepthEvidence("21",25999308,25999408,new int[]{22,22,32})); + depths.add(new DepthEvidence("21",25999408,25999508,new int[]{28,18,19})); + depths.add(new DepthEvidence("21",25999508,25999608,new int[]{17,20,29})); + depths.add(new DepthEvidence("21",25999608,25999708,new int[]{22,17,24})); + depths.add(new DepthEvidence("21",25999708,25999808,new int[]{26,27,26})); + depths.add(new DepthEvidence("21",25999808,25999908,new int[]{22,20,22})); + depths.add(new DepthEvidence("21",25999908,26000008,new int[]{24,20,29})); + depths.add(new DepthEvidence("22",29999964,30000064,new int[]{29,20,33})); + depths.add(new DepthEvidence("22",30000064,30000164,new int[]{25,18,25})); + depths.add(new DepthEvidence("22",30000164,30000264,new int[]{21,21,23})); + depths.add(new DepthEvidence("22",30000264,30000364,new int[]{35,22,32})); + depths.add(new DepthEvidence("22",30000364,30000464,new int[]{58,52,51})); + depths.add(new DepthEvidence("22",30000464,30000564,new int[]{35,35,32})); + depths.add(new DepthEvidence("22",30000564,30000664,new int[]{20,15,16})); + depths.add(new DepthEvidence("22",30000664,30000764,new int[]{36,22,26})); + depths.add(new DepthEvidence("22",30000764,30000864,new int[]{23,20,25})); + depths.add(new DepthEvidence("22",30000864,30000964,new int[]{16,18,30})); + } + + @Test + public void testTextRoundTrip() { + final DepthEvidenceCodec codec = new DepthEvidenceCodec(); + for ( final DepthEvidence de : depths ) { + Assert.assertEquals(codec.decode(DepthEvidenceCodec.encode(de)), de); + } + } + + @Test + public void testBinaryRoundTrip() { + final DepthEvidenceBCICodec codec = new DepthEvidenceBCICodec(); + final ByteArrayOutputStream os = new ByteArrayOutputStream(1024); + final FeaturesHeader header = + new FeaturesHeader(DepthEvidence.class.getSimpleName(), DepthEvidence.BCI_VERSION, dict, samples); + final Writer writer = + new Writer<>("in-memory stream", os, header, codec::encode); + for ( final DepthEvidence de : depths ) { + writer.write(de); + } + writer.close(); + final ByteArraySeekableStream ss = new ByteArraySeekableStream(os.toByteArray()); + final Reader reader = + new Reader<>("in-memory stream", ss, codec); + final List recoveredDepths = new ArrayList<>(18); + while ( reader.hasNext() ) { + recoveredDepths.add(reader.readStream()); + } + Assert.assertEquals(recoveredDepths, depths); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidenceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidenceUnitTest.java new file mode 100644 index 00000000000..d17549a9657 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidenceUnitTest.java @@ -0,0 +1,78 @@ +package org.broadinstitute.hellbender.tools.sv; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.seekablestream.ByteArraySeekableStream; +import org.broadinstitute.hellbender.utils.codecs.DiscordantPairEvidenceBCICodec; +import org.broadinstitute.hellbender.utils.codecs.DiscordantPairEvidenceCodec; +import org.broadinstitute.hellbender.utils.codecs.FeaturesHeader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.List; + +public class DiscordantPairEvidenceUnitTest { + private static final SAMSequenceDictionary dict = new SAMSequenceDictionary(); + private static final List samples = new ArrayList<>(3); + private static final List pairs = new ArrayList<>(18); + static { + dict.addSequence(new SAMSequenceRecord("21", 46709983)); + dict.addSequence(new SAMSequenceRecord("22", 50818468)); + dict.addSequence(new SAMSequenceRecord("X", 156040895)); + samples.add("A"); + samples.add("B"); + samples.add("C"); + pairs.add(new DiscordantPairEvidence("C","21",25972981,false,"X",123051950,true)); + pairs.add(new DiscordantPairEvidence("C","21",25974416,false,"X",433179,false)); + pairs.add(new DiscordantPairEvidence("B","21",25974526,true,"X",7510256,false)); + pairs.add(new DiscordantPairEvidence("A","21",25978689,false,"X",23061675,true)); + pairs.add(new DiscordantPairEvidence("C","21",25980279,true,"X",118908694,true)); + pairs.add(new DiscordantPairEvidence("C","21",25991097,false,"X",19552859,true)); + pairs.add(new DiscordantPairEvidence("B","21",25996526,true,"21",25997312,false)); + pairs.add(new DiscordantPairEvidence("B","21",25998677,true,"21",25999518,false)); + pairs.add(new DiscordantPairEvidence("A","21",25999457,true,"21",26000320,false)); + pairs.add(new DiscordantPairEvidence("B","22",30000459,true,"X",112737301,false)); + pairs.add(new DiscordantPairEvidence("A","22",30000461,false,"X",70214634,false)); + pairs.add(new DiscordantPairEvidence("C","22",30000464,true,"X",1113557,false)); + pairs.add(new DiscordantPairEvidence("B","22",30000670,true,"22",30001447,false)); + pairs.add(new DiscordantPairEvidence("C","22",30000827,false,"X",100936820,false)); + pairs.add(new DiscordantPairEvidence("A","22",30003590,false,"X",10293,false)); + pairs.add(new DiscordantPairEvidence("A","22",30006140,true,"22",30007026,false)); + pairs.add(new DiscordantPairEvidence("B","22",30006209,false,"X",116263582,false)); + pairs.add(new DiscordantPairEvidence("C","22",30009296,false,"X",141138844,true)); + } + + @Test + public void testTextRoundTrip() { + final DiscordantPairEvidenceCodec codec = new DiscordantPairEvidenceCodec(); + for ( final DiscordantPairEvidence pair : pairs ) { + Assert.assertEquals(codec.decode(DiscordantPairEvidenceCodec.encode(pair)), pair); + } + } + + @Test + public void testBinaryRoundTrip() { + final DiscordantPairEvidenceBCICodec codec = new DiscordantPairEvidenceBCICodec(); + final ByteArrayOutputStream os = new ByteArrayOutputStream(1024); + final FeaturesHeader header = new FeaturesHeader(DiscordantPairEvidence.class.getSimpleName(), + DiscordantPairEvidence.BCI_VERSION, dict, samples); + final Writer writer = + new Writer<>("in-memory stream", os, header, codec::encode); + for ( final DiscordantPairEvidence be : pairs ) { + writer.write(be); + } + writer.close(); + final ByteArraySeekableStream ss = new ByteArraySeekableStream(os.toByteArray()); + final Reader reader = + new Reader<>("in-memory stream", ss, codec); + final List recoveredDiscordantPairs = new ArrayList<>(18); + while ( reader.hasNext() ) { + recoveredDiscordantPairs.add(reader.readStream()); + } + Assert.assertEquals(recoveredDiscordantPairs, pairs); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidenceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidenceUnitTest.java new file mode 100644 index 00000000000..d832b7fd7d7 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidenceUnitTest.java @@ -0,0 +1,77 @@ +package org.broadinstitute.hellbender.tools.sv; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.seekablestream.ByteArraySeekableStream; +import org.broadinstitute.hellbender.utils.codecs.SplitReadEvidenceBCICodec; +import org.broadinstitute.hellbender.utils.codecs.SplitReadEvidenceCodec; +import org.broadinstitute.hellbender.utils.codecs.FeaturesHeader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.List; + +public class SplitReadEvidenceUnitTest { + private static final SAMSequenceDictionary dict = new SAMSequenceDictionary(); + private static final List samples = new ArrayList<>(3); + private static final List splits = new ArrayList<>(18); + static { + dict.addSequence(new SAMSequenceRecord("21", 46709983)); + dict.addSequence(new SAMSequenceRecord("22", 50818468)); + samples.add("A"); + samples.add("B"); + samples.add("C"); + splits.add(new SplitReadEvidence("B","21",25998421,1,true)); + splits.add(new SplitReadEvidence("B","21",25998456,1,true)); + splits.add(new SplitReadEvidence("B","21",25998529,1,true)); + splits.add(new SplitReadEvidence("B","21",25998668,1,false)); + splits.add(new SplitReadEvidence("B","21",25998825,1,true)); + splits.add(new SplitReadEvidence("C","21",25999220,1,false)); + splits.add(new SplitReadEvidence("B","21",25999674,1,false)); + splits.add(new SplitReadEvidence("B","21",25999957,1,false)); + splits.add(new SplitReadEvidence("C","22",30000009,1,false)); + splits.add(new SplitReadEvidence("B","22",30000115,1,false)); + splits.add(new SplitReadEvidence("B","22",30000125,1,false)); + splits.add(new SplitReadEvidence("A","22",30000133,1,false)); + splits.add(new SplitReadEvidence("B","22",30000137,1,false)); + splits.add(new SplitReadEvidence("B","22",30000189,1,false)); + splits.add(new SplitReadEvidence("B","22",30000192,1,false)); + splits.add(new SplitReadEvidence("A","22",30000196,1,false)); + splits.add(new SplitReadEvidence("B","22",30000335,1,false)); + splits.add(new SplitReadEvidence("A","22",30000338,1,false)); + } + + @Test + public void testTextRoundTrip() { + final SplitReadEvidenceCodec codec = new SplitReadEvidenceCodec(); + for ( final SplitReadEvidence sr : splits ) { + Assert.assertEquals(codec.decode(SplitReadEvidenceCodec.encode(sr)), sr); + } + } + + @Test + public void testBinaryRoundTrip() { + final SplitReadEvidenceBCICodec codec = new SplitReadEvidenceBCICodec(); + final ByteArrayOutputStream os = new ByteArrayOutputStream(1024); + final FeaturesHeader header = + new FeaturesHeader(SplitReadEvidence.class.getSimpleName(), SplitReadEvidence.BCI_VERSION, dict, samples); + final Writer writer = + new Writer<>("in-memory stream", os, header, codec::encode); + for ( final SplitReadEvidence sr : splits ) { + writer.write(sr); + } + writer.close(); + final ByteArraySeekableStream ss = new ByteArraySeekableStream(os.toByteArray()); + final Reader reader = + new Reader<>("in-memory stream", ss, codec); + final List recoveredSplitReads = new ArrayList<>(18); + while ( reader.hasNext() ) { + recoveredSplitReads.add(reader.readStream()); + } + Assert.assertEquals(recoveredSplitReads, splits); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollectionIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidenceIntegrationTest.java similarity index 52% rename from src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollectionIntegrationTest.java rename to src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidenceIntegrationTest.java index acb42aec13a..433f3ca0cf3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollectionIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidenceIntegrationTest.java @@ -7,8 +7,9 @@ import org.testng.annotations.Test; import java.util.Arrays; +import java.util.Collections; -public class PairedEndAndSplitReadEvidenceCollectionIntegrationTest extends CommandLineProgramTest { +public class CollectSVEvidenceIntegrationTest extends CommandLineProgramTest { public static final String pesrTestDir = toolsTestDir + "walkers/sv/pesr"; @@ -16,10 +17,16 @@ public class PairedEndAndSplitReadEvidenceCollectionIntegrationTest extends Comm public void testPESRCollection() throws Exception { // these test files were generated by svtk collect-pesr IntegrationTestSpec spec = new IntegrationTestSpec( - "-I " + NA12878_20_21_WGS_bam + " --sample-name NA12878 -PE %s -SR %s", - Arrays.asList(pesrTestDir + "/NA12878" + DiscordantPairEvidenceCodec.FORMAT_SUFFIX + ".gz", - pesrTestDir + "/NA12878" + SplitReadEvidenceCodec.FORMAT_SUFFIX + ".gz")); + "-I " + NA12878_20_21_WGS_bam + " --sample-name NA12878 -PE %s", + Collections.singletonList(pesrTestDir + "/NA12878" + DiscordantPairEvidenceCodec.FORMAT_SUFFIX + ".gz")); + spec.setOutputFileExtension(DiscordantPairEvidenceCodec.FORMAT_SUFFIX + ".gz"); spec.executeTest("base PESR collection", this); + + IntegrationTestSpec spec2 = new IntegrationTestSpec( + "-I " + NA12878_20_21_WGS_bam + " --sample-name NA12878 -SR %s", + Collections.singletonList(pesrTestDir + "/NA12878" + SplitReadEvidenceCodec.FORMAT_SUFFIX + ".gz")); + spec2.setOutputFileExtension(SplitReadEvidenceCodec.FORMAT_SUFFIX + ".gz"); + spec2.executeTest("base PESR collection", this); } } \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollectionUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidenceUnitTest.java similarity index 71% rename from src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollectionUnitTest.java rename to src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidenceUnitTest.java index 9e16f85fee6..e03b95e03ad 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/PairedEndAndSplitReadEvidenceCollectionUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidenceUnitTest.java @@ -5,6 +5,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.tools.sv.SplitReadEvidence; +import org.broadinstitute.hellbender.tools.walkers.sv.CollectSVEvidence.*; import org.broadinstitute.hellbender.utils.codecs.SplitReadEvidenceCodec; import org.broadinstitute.hellbender.utils.io.FeatureOutputStream; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; @@ -19,10 +20,10 @@ import java.util.function.Function; import java.util.stream.Collectors; -public class PairedEndAndSplitReadEvidenceCollectionUnitTest extends GATKBaseTest { +public class CollectSVEvidenceUnitTest extends GATKBaseTest { @Test - public void testGetReportableDiscordantReadPair() throws Exception { + public void testGetReportableDiscordantReadPair() { final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(2, 1, 10000); // read pair on different contigs @@ -45,9 +46,9 @@ public void testGetReportableDiscordantReadPair() throws Exception { final HashSet observedDiscordantNamesAtThisLocus = new HashSet<>(); - final PairedEndAndSplitReadEvidenceCollection tool = new PairedEndAndSplitReadEvidenceCollection(); + final CollectSVEvidence tool = new CollectSVEvidence(); - PairedEndAndSplitReadEvidenceCollection.DiscordantRead reportableDiscordantReadPair = + DiscordantRead reportableDiscordantReadPair = tool.getReportableDiscordantReadPair(discRPDiffContigsFirst, observedDiscordantNamesAtThisLocus, header.getSequenceDictionary()); Assert.assertNotNull(reportableDiscordantReadPair); assertDiscordantReadInfo(reportableDiscordantReadPair, discRPDiffContigsFirst); @@ -76,7 +77,7 @@ public void testGetReportableDiscordantReadPair() throws Exception { } - private void assertDiscordantReadInfo(final PairedEndAndSplitReadEvidenceCollection.DiscordantRead reportableDiscordantReadPair, final GATKRead gatkRead) { + private void assertDiscordantReadInfo(final DiscordantRead reportableDiscordantReadPair, final GATKRead gatkRead) { Assert.assertEquals(reportableDiscordantReadPair.getName(), gatkRead.getName()); Assert.assertEquals(reportableDiscordantReadPair.getContig(), gatkRead.getContig()); Assert.assertEquals(reportableDiscordantReadPair.getStart(), gatkRead.getStart()); @@ -85,58 +86,54 @@ private void assertDiscordantReadInfo(final PairedEndAndSplitReadEvidenceCollect } // Workaround for use of generic class with Mockito - private class SplitReadFeatureOutputStream extends FeatureOutputStream { + private static class SplitReadFeatureOutputStream extends FeatureOutputStream { public SplitReadFeatureOutputStream() { - super(new GATKPath(""), new SplitReadEvidenceCodec(), SplitReadEvidenceCodec::encode, - new SAMSequenceDictionary(), 4); + super(new GATKPath(""), new SplitReadEvidenceCodec().getTabixFormat(), + SplitReadEvidenceCodec::encode, new SAMSequenceDictionary(), 4); } } @Test - public void testCountSplitRead() throws Exception { + public void testCountSplitRead() { final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(2, 1, 10000); final GATKRead rightClip = ArtificialReadUtils.createArtificialRead(header, "rightClip", 0, 1000, ArtificialReadUtils.createRandomReadBases(151, false), ArtificialReadUtils.createRandomReadQuals(151), "100M51S"); - final FeatureOutputStream mockSrWriter = Mockito.mock(SplitReadFeatureOutputStream.class); + final SplitReadFeatureOutputStream mockSrWriter = Mockito.mock(SplitReadFeatureOutputStream.class); - PairedEndAndSplitReadEvidenceCollection tool = new PairedEndAndSplitReadEvidenceCollection(); - final PriorityQueue splitCounts = new PriorityQueue<>(new PairedEndAndSplitReadEvidenceCollection.SplitPosComparator()); + CollectSVEvidence tool = new CollectSVEvidence(); + final PriorityQueue splitCounts = new PriorityQueue<>(new SplitPosComparator()); tool.sampleName = "sample"; tool.countSplitRead(rightClip, splitCounts, mockSrWriter); - Map counts = splitCounts.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); - Assert.assertEquals(counts.get(new PairedEndAndSplitReadEvidenceCollection.SplitPos(1100, PairedEndAndSplitReadEvidenceCollection.POSITION.RIGHT)).intValue(), 1); + Map counts = splitCounts.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + Assert.assertEquals(counts.get(new SplitPos(1100, POSITION.RIGHT)).intValue(), 1); Mockito.verifyZeroInteractions(mockSrWriter); final GATKRead rightClip2 = ArtificialReadUtils.createArtificialRead(header, "rightClip2", 0, 1050, ArtificialReadUtils.createRandomReadBases(151, false), ArtificialReadUtils.createRandomReadQuals(151), "50M101S"); tool.countSplitRead(rightClip2, splitCounts, mockSrWriter); counts = splitCounts.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); - Assert.assertEquals(counts.get(new PairedEndAndSplitReadEvidenceCollection.SplitPos(1100, PairedEndAndSplitReadEvidenceCollection.POSITION.RIGHT)).intValue(), 2); + Assert.assertEquals(counts.get(new SplitPos(1100, POSITION.RIGHT)).intValue(), 2); Mockito.verifyZeroInteractions(mockSrWriter); final GATKRead leftClip = ArtificialReadUtils.createArtificialRead(header, "leftClip", 0, 1100, ArtificialReadUtils.createRandomReadBases(151, false), ArtificialReadUtils.createRandomReadQuals(151), "20S131M"); tool.countSplitRead(leftClip, splitCounts, mockSrWriter); counts = splitCounts.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); - Assert.assertEquals(counts.get(new PairedEndAndSplitReadEvidenceCollection.SplitPos(1100, PairedEndAndSplitReadEvidenceCollection.POSITION.RIGHT)).intValue(), 2); - Assert.assertEquals(counts.get(new PairedEndAndSplitReadEvidenceCollection.SplitPos(1100, PairedEndAndSplitReadEvidenceCollection.POSITION.LEFT)).intValue(), 1); + Assert.assertEquals(counts.get(new SplitPos(1100, POSITION.RIGHT)).intValue(), 2); + Assert.assertEquals(counts.get(new SplitPos(1100, POSITION.LEFT)).intValue(), 1); Mockito.verifyZeroInteractions(mockSrWriter); final GATKRead leftClipDownstream = ArtificialReadUtils.createArtificialRead(header, "leftClipDownstream", 0, 1600, ArtificialReadUtils.createRandomReadBases(151, false), ArtificialReadUtils.createRandomReadQuals(151), "20S131M"); tool.countSplitRead(leftClipDownstream, splitCounts, mockSrWriter); counts = splitCounts.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); - Assert.assertFalse(counts.containsKey(new PairedEndAndSplitReadEvidenceCollection.SplitPos(1100, PairedEndAndSplitReadEvidenceCollection.POSITION.RIGHT))); - Assert.assertFalse(counts.containsKey(new PairedEndAndSplitReadEvidenceCollection.SplitPos(1100, PairedEndAndSplitReadEvidenceCollection.POSITION.LEFT))); - Assert.assertEquals(counts.get(new PairedEndAndSplitReadEvidenceCollection.SplitPos(1600, PairedEndAndSplitReadEvidenceCollection.POSITION.LEFT)).intValue(), 1); - final SplitReadEvidence splitRead1 = new SplitReadEvidence("sample", "1", 1100, 1, false); - final SplitReadEvidence splitRead2 = new SplitReadEvidence("sample", "1", 1100, 2, true); - Mockito.verify(mockSrWriter).add(splitRead1); - Mockito.verify(mockSrWriter).add(splitRead2); + Assert.assertFalse(counts.containsKey(new SplitPos(1100, POSITION.RIGHT))); + Assert.assertFalse(counts.containsKey(new SplitPos(1100, POSITION.LEFT))); + Assert.assertEquals(counts.get(new SplitPos(1600, POSITION.LEFT)).intValue(), 1); + Mockito.verify(mockSrWriter).write(new SplitReadEvidence(tool.sampleName, "1", 1100, 1, false)); + Mockito.verify(mockSrWriter).write(new SplitReadEvidence(tool.sampleName, "1", 1100, 2, true)); Mockito.verifyNoMoreInteractions(mockSrWriter); - } - } \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalTreeTest.java b/src/test/java/org/broadinstitute/hellbender/utils/SVIntervalTreeTest.java similarity index 98% rename from src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalTreeTest.java rename to src/test/java/org/broadinstitute/hellbender/utils/SVIntervalTreeTest.java index eec43c1e841..c46c6a55327 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalTreeTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/SVIntervalTreeTest.java @@ -1,6 +1,8 @@ -package org.broadinstitute.hellbender.tools.spark.sv.utils; +package org.broadinstitute.hellbender.utils; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/SVIntervalUnitTest.java similarity index 96% rename from src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalUnitTest.java rename to src/test/java/org/broadinstitute/hellbender/utils/SVIntervalUnitTest.java index 3c05a3924b9..9e4e9252616 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/SVIntervalUnitTest.java @@ -1,6 +1,7 @@ -package org.broadinstitute.hellbender.tools.spark.sv.utils; +package org.broadinstitute.hellbender.utils; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.SVInterval; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/src/test/java/org/broadinstitute/hellbender/utils/io/BlockCompressIntervalStreamUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/io/BlockCompressIntervalStreamUnitTest.java new file mode 100644 index 00000000000..358c3520c1c --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/utils/io/BlockCompressIntervalStreamUnitTest.java @@ -0,0 +1,178 @@ +package org.broadinstitute.hellbender.utils.io; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.seekablestream.ByteArraySeekableStream; +import htsjdk.samtools.util.LocationAware; +import htsjdk.tribble.*; +import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.codecs.FeaturesHeader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Reader; +import org.broadinstitute.hellbender.utils.io.BlockCompressedIntervalStream.Writer; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.*; +import java.util.Collections; + +public class BlockCompressIntervalStreamUnitTest extends GATKBaseTest { + private static int POS_START = 1000; + private static int POS_INC = 1100; + private static int POS_END = 10000000; + private static int FEATURE_LENGTH = 2000; + + private static final SAMSequenceDictionary dict = new SAMSequenceDictionary(); + static { + dict.addSequence(new SAMSequenceRecord("21", 46709983)); + dict.addSequence(new SAMSequenceRecord("22", 50818468)); + } + + private static void write( final SimpleFeature feature, final Writer writer ) throws IOException { + final DataOutputStream dos = writer.getStream(); + dos.writeInt(writer.getContigIndex(feature.getContig())); + dos.writeInt(feature.getStart()); + dos.writeInt(feature.getEnd()); + } + + private static class SimpleFeatureCodec implements FeatureCodec> { + + @Override + public Feature decodeLoc( Reader simpleFeatureReader ) throws IOException { + return null; + } + + @Override + public SimpleFeature decode( Reader reader ) throws IOException { + final DataInputStream dis = reader.getStream(); + final String contig = reader.getSequenceNames().get(dis.readInt()); + return new SimpleFeature(contig, dis.readInt(), dis.readInt()); + } + + @Override + public FeatureCodecHeader readHeader( Reader simpleFeatureReader ) throws IOException { + return null; + } + + @Override + public Class getFeatureType() { + return null; + } + + @Override + public Reader makeSourceFromStream( InputStream bufferedInputStream ) { + return null; + } + + @Override + public LocationAware makeIndexableSourceFromStream( InputStream inputStream ) { + return null; + } + + @Override + public boolean isDone( Reader simpleFeatureReader ) { + return false; + } + + @Override + public void close( Reader simpleFeatureReader ) { + + } + + @Override + public boolean canDecode( String path ) { + return false; + } + } + + @Test + public void testRoundTrip() { + final ByteArrayOutputStream os = new ByteArrayOutputStream(200000); + final FeaturesHeader header = + new FeaturesHeader(SimpleFeature.class.getSimpleName(), "1", dict, Collections.emptyList()); + final Writer writer = + new Writer<>("in-memory stream", os, header, BlockCompressIntervalStreamUnitTest::write); + for ( final SAMSequenceRecord rec : dict.getSequences() ) { + final String contig = rec.getSequenceName(); + for ( int start = POS_START; start < POS_END; start += POS_INC ) { + final SimpleFeature feature = new SimpleFeature(contig, start, start + FEATURE_LENGTH); + writer.write(feature); + } + } + writer.close(); + final ByteArraySeekableStream ss = new ByteArraySeekableStream(os.toByteArray()); + final SimpleFeatureCodec codec = new SimpleFeatureCodec(); + final BlockCompressedIntervalStream.Reader reader = + new BlockCompressedIntervalStream.Reader<>("in-memory stream", ss, codec); + for ( final SAMSequenceRecord rec : dict.getSequences() ) { + final String contig = rec.getSequenceName(); + for ( int start = POS_START; start < POS_END; start += POS_INC ) { + final SimpleFeature feature = new SimpleFeature(contig, start, start + FEATURE_LENGTH); + final SimpleFeature recoveredFeature = reader.readStream(); + Assert.assertEquals(feature.getContig(), recoveredFeature.getContig()); + Assert.assertEquals(feature.getStart(), recoveredFeature.getStart()); + Assert.assertEquals(feature.getEnd(), recoveredFeature.getEnd()); + } + } + reader.close(); + } + + @Test + public void testQuery() throws IOException { + final ByteArrayOutputStream os = new ByteArrayOutputStream(200000); + final FeaturesHeader header = + new FeaturesHeader(SimpleFeature.class.getSimpleName(), "1", dict, Collections.emptyList()); + final Writer writer = + new Writer<>("in-memory stream", os, header, BlockCompressIntervalStreamUnitTest::write); + + // write an initial gigantic feature + final String contig0 = dict.getSequence(0).getSequenceName(); + final SimpleFeature bigFeature = new SimpleFeature(contig0, POS_START - 10, POS_END); + writer.write(bigFeature); + + for ( final SAMSequenceRecord rec : dict.getSequences() ) { + final String contig = rec.getSequenceName(); + for ( int start = POS_START; start < POS_END; start += POS_INC ) { + final SimpleFeature feature = new SimpleFeature(contig, start, start + FEATURE_LENGTH); + writer.write(feature); + } + } + writer.close(); + + final ByteArraySeekableStream ss = new ByteArraySeekableStream(os.toByteArray()); + final SimpleFeatureCodec codec = new SimpleFeatureCodec(); + final BlockCompressedIntervalStream.Reader reader = + new BlockCompressedIntervalStream.Reader<>("in-memory stream", ss, codec); + final CloseableTribbleIterator itr = + reader.query(contig0, POS_START + 10, POS_START + 20); + Assert.assertTrue(itr.hasNext()); + final SimpleFeature recoveredBigFeature = itr.next(); + Assert.assertEquals(bigFeature.getContig(), recoveredBigFeature.getContig()); + Assert.assertEquals(bigFeature.getStart(), recoveredBigFeature.getStart()); + Assert.assertEquals(bigFeature.getEnd(), recoveredBigFeature.getEnd()); + Assert.assertTrue(itr.hasNext()); + final SimpleFeature feature = new SimpleFeature(contig0, POS_START, POS_START + FEATURE_LENGTH); + final SimpleFeature recoveredFeature = itr.next(); + Assert.assertEquals(feature.getContig(), recoveredFeature.getContig()); + Assert.assertEquals(feature.getStart(), recoveredFeature.getStart()); + Assert.assertEquals(feature.getEnd(), recoveredFeature.getEnd()); + Assert.assertFalse(itr.hasNext()); + itr.close(); + reader.close(); + + final String contig1 = dict.getSequence(1).getSequenceName(); + // can't requery an in-memory stream -- start fresh + final ByteArraySeekableStream ss2 = new ByteArraySeekableStream(os.toByteArray()); + final BlockCompressedIntervalStream.Reader reader2 = + new BlockCompressedIntervalStream.Reader<>("in-memory stream", ss2, codec); + final CloseableTribbleIterator itr2 = + reader2.query(contig1, POS_START + 10, POS_START + 20); + final SimpleFeature feature1 = new SimpleFeature(contig1, POS_START, POS_START + FEATURE_LENGTH); + final SimpleFeature recoveredFeature1 = itr2.next(); + Assert.assertEquals(feature1.getContig(), recoveredFeature1.getContig()); + Assert.assertEquals(feature1.getStart(), recoveredFeature1.getStart()); + Assert.assertEquals(feature1.getEnd(), recoveredFeature1.getEnd()); + Assert.assertFalse(itr2.hasNext()); + itr2.close(); + reader2.close(); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStreamUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStreamUnitTest.java index 43c460a3fb7..c0c4dc4c009 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStreamUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStreamUnitTest.java @@ -71,9 +71,9 @@ public void test(final ArrayList featureList, final String expectedHeader) throws IOException { final File tempDir = IOUtils.createTempDir(FeatureOutputStream.class.getSimpleName()); final Path outFilePath = Paths.get(tempDir.toString(), getClass().getSimpleName() + extension); - final FeatureOutputStream stream = new FeatureOutputStream( + final FeatureOutputStream stream = new FeatureOutputStream<>( new GATKPath(outFilePath.toString()), - codec, + codec.getTabixFormat(), FeatureOutputStreamUnitTest::encodeSVEvidenceFeature, dictionary, 4 @@ -81,9 +81,9 @@ public void test(final ArrayList featureList, testWithStream(stream, featureList, outFilePath, codec, expectedHeader, false); final Path outFilePathGz = Paths.get(outFilePath + ".gz"); - final FeatureOutputStream streamGz = new FeatureOutputStream( + final FeatureOutputStream streamGz = new FeatureOutputStream<>( new GATKPath(outFilePathGz.toString()), - codec, + codec.getTabixFormat(), FeatureOutputStreamUnitTest::encodeSVEvidenceFeature, dictionary, 4 @@ -98,7 +98,7 @@ private void testWithStream(final FeatureOutputStream str if (expectedHeader != null) { stream.writeHeader(expectedHeader); } - featureList.stream().forEachOrdered(s -> stream.add(s)); + featureList.stream().forEachOrdered(s -> stream.write(s)); stream.close(); if (indexed) { Assert.assertTrue(Files.exists(outIndexPath), "Index does not exist");