From 2ad36f78f79fce71a5273bbee1707731d90d27cc Mon Sep 17 00:00:00 2001 From: meganshand Date: Mon, 24 Aug 2020 09:10:47 -0400 Subject: [PATCH] Adding a test and small features to var store branch (#6761) --- .../arrays/ArrayMetadataTsvCreator.java | 7 ++++--- .../arrays/CreateArrayIngestFiles.java | 15 +++++++++++-- .../variantdb/arrays/RawArrayTsvCreator.java | 21 ++++++++++++------- .../tools/variantdb/arrays/array.vcf | 19 +++++++++++++++++ .../arrays/metadata_001_testSample.tsv | 2 ++ .../tools/variantdb/arrays/probe_info.csv | 3 +++ .../variantdb/arrays/raw_001_testSample.tsv | 3 +++ .../tools/variantdb/arrays/sampleMap.csv | 1 + 8 files changed, 59 insertions(+), 12 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/array.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/metadata_001_testSample.tsv create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/probe_info.csv create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/raw_001_testSample.tsv create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/sampleMap.csv diff --git a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/ArrayMetadataTsvCreator.java b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/ArrayMetadataTsvCreator.java index 998edbb8f22..4bc3cf1eb72 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/ArrayMetadataTsvCreator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/ArrayMetadataTsvCreator.java @@ -8,6 +8,7 @@ import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.tsv.SimpleXSVWriter; +import java.io.File; import java.io.IOException; import java.nio.file.Paths; import java.util.ArrayList; @@ -31,15 +32,15 @@ public static List getHeaders() { return Arrays.stream(ArrayMetadataTsvCreator.HeaderFieldEnum.values()).map(String::valueOf).collect(Collectors.toList()); } - public void createRow(String sampleName, String sampleId, String tableNumberPrefix) { + public void createRow(String sampleName, String sampleId, String tableNumberPrefix, File outputDirectory) { // if the metadata tsvs don't exist yet -- create them try { // Create a metadata file to go into the metadata dir for _this_ sample // TODO--this should just be one file per sample set? - final String sampleMetadataName = IngestConstants.metadataFilePrefix + tableNumberPrefix + sampleName + IngestConstants.FILETYPE; + final File sampleMetadataName = new File (outputDirectory, IngestConstants.metadataFilePrefix + tableNumberPrefix + sampleName + IngestConstants.FILETYPE); // write header to it List sampleListHeader = ArrayMetadataTsvCreator.getHeaders(); - sampleMetadataWriter = new SimpleXSVWriter(Paths.get(sampleMetadataName), IngestConstants.SEPARATOR); + sampleMetadataWriter = new SimpleXSVWriter(sampleMetadataName.toPath(), IngestConstants.SEPARATOR); sampleMetadataWriter.setHeaderLine(sampleListHeader); final List TSVLineToCreateSampleMetadata = createSampleListRow( diff --git a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/CreateArrayIngestFiles.java b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/CreateArrayIngestFiles.java index cdaf4a81129..71791fabd57 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/CreateArrayIngestFiles.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/CreateArrayIngestFiles.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.variantdb.arrays; +import htsjdk.samtools.util.RuntimeIOException; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; import org.apache.logging.log4j.LogManager; @@ -71,9 +72,19 @@ public final class CreateArrayIngestFiles extends VariantWalker { optional = true) private String refVersion = "37"; + @Argument( + fullName = "output-directory", + doc = "directory for output tsv files", + optional = true) + private File outputDir = new File("."); + @Override public void onTraversalStart() { + //set up output directory + if (!outputDir.exists() && !outputDir.mkdir()) { + throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath()); + } // Get sample name final VCFHeader inputVCFHeader = getHeaderForVariants(); @@ -92,7 +103,7 @@ public void onTraversalStart() { String tableNumberPrefix = String.format("%03d_", sampleTableNumber); metadataTsvCreator = new ArrayMetadataTsvCreator(); - metadataTsvCreator.createRow(sampleName, sampleId, tableNumberPrefix); + metadataTsvCreator.createRow(sampleName, sampleId, tableNumberPrefix, outputDir); Map probeNameMap; if (probeCsvFile == null) { @@ -104,7 +115,7 @@ public void onTraversalStart() { // Set reference version ChromosomeEnum.setRefVersion(refVersion); - tsvCreator = new RawArrayTsvCreator(sampleName, sampleId, tableNumberPrefix, probeNameMap, useCompressedData); + tsvCreator = new RawArrayTsvCreator(sampleName, sampleId, tableNumberPrefix, probeNameMap, useCompressedData, outputDir); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/RawArrayTsvCreator.java b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/RawArrayTsvCreator.java index 889bea232cc..2d13d2d7c7c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/RawArrayTsvCreator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/arrays/RawArrayTsvCreator.java @@ -11,6 +11,7 @@ import org.broadinstitute.hellbender.utils.tsv.SimpleXSVWriter; +import java.io.File; import java.io.IOException; import java.nio.file.Paths; import java.util.ArrayList; @@ -43,16 +44,16 @@ String getValue() { } } - public RawArrayTsvCreator(final String sampleName, final String sampleId, final String tableNumberPrefix, final Map probeDataByName, boolean useCompressedData) { + public RawArrayTsvCreator(final String sampleName, final String sampleId, final String tableNumberPrefix, final Map probeDataByName, boolean useCompressedData, final File outputDirectory) { this.sampleId = sampleId; this.probeDataByName = probeDataByName; this.useCompressedData = useCompressedData; try { // Create a raw file to go into the raw dir for _this_ sample - final String rawOutputName = RAW_FILETYPE_PREFIX + tableNumberPrefix + sampleName + IngestConstants.FILETYPE; + final File rawOutputName = new File(outputDirectory, RAW_FILETYPE_PREFIX + tableNumberPrefix + sampleName + IngestConstants.FILETYPE); // write header to it - List rawHeader = RawArrayTsvCreator.getHeaders(); - rawArrayWriter = new SimpleXSVWriter(Paths.get(rawOutputName), IngestConstants.SEPARATOR); + List rawHeader = RawArrayTsvCreator.getHeaders(useCompressedData); + rawArrayWriter = new SimpleXSVWriter(rawOutputName.toPath(), IngestConstants.SEPARATOR); rawArrayWriter.setHeaderLine(rawHeader); } catch (final IOException e) { throw new UserException("Could not create raw outputs", e); @@ -80,19 +81,25 @@ public List createRow(final VariantContext variant, final String sampleI return row; } - public static List getHeaders() { - return Arrays.stream(RawArrayFieldEnum.values()).map(String::valueOf).collect(Collectors.toList()); + public static List getHeaders(final boolean useCompressedData) { + if (useCompressedData) { + return Arrays.stream(RawArrayFieldEnum.getCompressedRawArrayFieldEnums()).map(String::valueOf).collect(Collectors.toList()); + } + return Arrays.stream(RawArrayFieldEnum.getUncompressedRawArrayFieldEnums()).map(String::valueOf).collect(Collectors.toList()); } public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { if (!variant.getFilters().contains("ZEROED_OUT_ASSAY")) { final List rowData = createRow(variant, sampleId); + int length = useCompressedData ? RawArrayFieldEnum.getCompressedRawArrayFieldEnums().length : RawArrayFieldEnum.getUncompressedRawArrayFieldEnums().length; // write the row to the XSV - if (rowData.size() == RawArrayFieldEnum.values().length) { + if (rowData.size() == length) { SimpleXSVWriter.LineBuilder rawLine = rawArrayWriter.getNewLineBuilder(); rawLine.setRow(rowData); rawLine.write(); + } else { + throw new UserException("Length of row data didn't match length of expected row data."); } } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/array.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/array.vcf new file mode 100644 index 00000000000..337bbc451d6 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/array.vcf @@ -0,0 +1,19 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##expectedGender=Unknown +##extendedIlluminaManifestVersion=1.5 +##fingerprintGender=Unknown +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testSample +1 5700115 1:5700115-A-T A T . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=T;AN=2; GT:BAF:IGC:LRR:NORMX:NORMY 0/0:0:0.58:-0.235:1.476:0.057 +1 5700116 1:5700116-C-G C G . . AC=0;AF=0.00;ALLELE_A=C*;ALLELE_B=G;AN=2; GT:BAF:IGC:LRR:NORMX:NORMY 0/1:0:0.23:-0.111:1.093:0.118 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/metadata_001_testSample.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/metadata_001_testSample.tsv new file mode 100644 index 00000000000..fa116665c61 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/metadata_001_testSample.tsv @@ -0,0 +1,2 @@ +sample_name sample_id +testSample 1 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/probe_info.csv b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/probe_info.csv new file mode 100644 index 00000000000..580b71263ad --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/probe_info.csv @@ -0,0 +1,3 @@ +ProbeId,Name,GenomeBuild,Chr,Position,Ref,AlleleA,AlleleB,build37Flag +1,1:5700115-A-T,37,1,5700115,A,A,T, +2,1:5700116-C-G,37,1,5700116,C,C,G, diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/raw_001_testSample.tsv b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/raw_001_testSample.tsv new file mode 100644 index 00000000000..ab883d9d4cf --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/raw_001_testSample.tsv @@ -0,0 +1,3 @@ +sample_id probe_id GT_encoded NORMX NORMY BAF LRR +1 1 AA 1.476 0.057 0 -0.235 +1 2 AB 1.093 0.118 0 -0.111 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/sampleMap.csv b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/sampleMap.csv new file mode 100644 index 00000000000..99fccede2fc --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/variantdb/arrays/sampleMap.csv @@ -0,0 +1 @@ +1,testSample