Skip to content

Commit

Permalink
Adding a test and small features to var store branch (#6761)
Browse files Browse the repository at this point in the history
  • Loading branch information
meganshand authored and ahaessly committed Sep 11, 2020
1 parent bca8e1e commit 2ad36f7
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.tsv.SimpleXSVWriter;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
Expand All @@ -31,15 +32,15 @@ public static List<String> getHeaders() {
return Arrays.stream(ArrayMetadataTsvCreator.HeaderFieldEnum.values()).map(String::valueOf).collect(Collectors.toList());
}

public void createRow(String sampleName, String sampleId, String tableNumberPrefix) {
public void createRow(String sampleName, String sampleId, String tableNumberPrefix, File outputDirectory) {
// if the metadata tsvs don't exist yet -- create them
try {
// Create a metadata file to go into the metadata dir for _this_ sample
// TODO--this should just be one file per sample set?
final String sampleMetadataName = IngestConstants.metadataFilePrefix + tableNumberPrefix + sampleName + IngestConstants.FILETYPE;
final File sampleMetadataName = new File (outputDirectory, IngestConstants.metadataFilePrefix + tableNumberPrefix + sampleName + IngestConstants.FILETYPE);
// write header to it
List<String> sampleListHeader = ArrayMetadataTsvCreator.getHeaders();
sampleMetadataWriter = new SimpleXSVWriter(Paths.get(sampleMetadataName), IngestConstants.SEPARATOR);
sampleMetadataWriter = new SimpleXSVWriter(sampleMetadataName.toPath(), IngestConstants.SEPARATOR);
sampleMetadataWriter.setHeaderLine(sampleListHeader);

final List<String> TSVLineToCreateSampleMetadata = createSampleListRow(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.variantdb.arrays;

import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
import org.apache.logging.log4j.LogManager;
Expand Down Expand Up @@ -71,9 +72,19 @@ public final class CreateArrayIngestFiles extends VariantWalker {
optional = true)
private String refVersion = "37";

@Argument(
fullName = "output-directory",
doc = "directory for output tsv files",
optional = true)
private File outputDir = new File(".");


@Override
public void onTraversalStart() {
//set up output directory
if (!outputDir.exists() && !outputDir.mkdir()) {
throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
}

// Get sample name
final VCFHeader inputVCFHeader = getHeaderForVariants();
Expand All @@ -92,7 +103,7 @@ public void onTraversalStart() {
String tableNumberPrefix = String.format("%03d_", sampleTableNumber);

metadataTsvCreator = new ArrayMetadataTsvCreator();
metadataTsvCreator.createRow(sampleName, sampleId, tableNumberPrefix);
metadataTsvCreator.createRow(sampleName, sampleId, tableNumberPrefix, outputDir);

Map<String, ProbeInfo> probeNameMap;
if (probeCsvFile == null) {
Expand All @@ -104,7 +115,7 @@ public void onTraversalStart() {
// Set reference version
ChromosomeEnum.setRefVersion(refVersion);

tsvCreator = new RawArrayTsvCreator(sampleName, sampleId, tableNumberPrefix, probeNameMap, useCompressedData);
tsvCreator = new RawArrayTsvCreator(sampleName, sampleId, tableNumberPrefix, probeNameMap, useCompressedData, outputDir);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.broadinstitute.hellbender.utils.tsv.SimpleXSVWriter;


import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
Expand Down Expand Up @@ -43,16 +44,16 @@ String getValue() {
}
}

public RawArrayTsvCreator(final String sampleName, final String sampleId, final String tableNumberPrefix, final Map<String, ProbeInfo> probeDataByName, boolean useCompressedData) {
public RawArrayTsvCreator(final String sampleName, final String sampleId, final String tableNumberPrefix, final Map<String, ProbeInfo> probeDataByName, boolean useCompressedData, final File outputDirectory) {
this.sampleId = sampleId;
this.probeDataByName = probeDataByName;
this.useCompressedData = useCompressedData;
try {
// Create a raw file to go into the raw dir for _this_ sample
final String rawOutputName = RAW_FILETYPE_PREFIX + tableNumberPrefix + sampleName + IngestConstants.FILETYPE;
final File rawOutputName = new File(outputDirectory, RAW_FILETYPE_PREFIX + tableNumberPrefix + sampleName + IngestConstants.FILETYPE);
// write header to it
List<String> rawHeader = RawArrayTsvCreator.getHeaders();
rawArrayWriter = new SimpleXSVWriter(Paths.get(rawOutputName), IngestConstants.SEPARATOR);
List<String> rawHeader = RawArrayTsvCreator.getHeaders(useCompressedData);
rawArrayWriter = new SimpleXSVWriter(rawOutputName.toPath(), IngestConstants.SEPARATOR);
rawArrayWriter.setHeaderLine(rawHeader);
} catch (final IOException e) {
throw new UserException("Could not create raw outputs", e);
Expand Down Expand Up @@ -80,19 +81,25 @@ public List<String> createRow(final VariantContext variant, final String sampleI
return row;
}

public static List<String> getHeaders() {
return Arrays.stream(RawArrayFieldEnum.values()).map(String::valueOf).collect(Collectors.toList());
public static List<String> getHeaders(final boolean useCompressedData) {
if (useCompressedData) {
return Arrays.stream(RawArrayFieldEnum.getCompressedRawArrayFieldEnums()).map(String::valueOf).collect(Collectors.toList());
}
return Arrays.stream(RawArrayFieldEnum.getUncompressedRawArrayFieldEnums()).map(String::valueOf).collect(Collectors.toList());
}

public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
if (!variant.getFilters().contains("ZEROED_OUT_ASSAY")) {
final List<String> rowData = createRow(variant, sampleId);

int length = useCompressedData ? RawArrayFieldEnum.getCompressedRawArrayFieldEnums().length : RawArrayFieldEnum.getUncompressedRawArrayFieldEnums().length;
// write the row to the XSV
if (rowData.size() == RawArrayFieldEnum.values().length) {
if (rowData.size() == length) {
SimpleXSVWriter.LineBuilder rawLine = rawArrayWriter.getNewLineBuilder();
rawLine.setRow(rowData);
rawLine.write();
} else {
throw new UserException("Length of row data didn't match length of expected row data.");
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
##fileformat=VCFv4.2
##FORMAT=<ID=BAF,Number=1,Type=Float,Description="B Allele Frequency">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=IGC,Number=1,Type=Float,Description="Illumina GenCall Confidence Score">
##FORMAT=<ID=LRR,Number=1,Type=Float,Description="Log R Ratio">
##FORMAT=<ID=NORMX,Number=1,Type=Float,Description="Normalized X intensity">
##FORMAT=<ID=NORMY,Number=1,Type=Float,Description="Normalized Y intensity">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=ALLELE_A,Number=1,Type=String,Description="A allele">
##INFO=<ID=ALLELE_B,Number=1,Type=String,Description="B allele">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##contig=<ID=1,length=249250621,assembly=GRCh37>
##expectedGender=Unknown
##extendedIlluminaManifestVersion=1.5
##fingerprintGender=Unknown
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testSample
1 5700115 1:5700115-A-T A T . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=T;AN=2; GT:BAF:IGC:LRR:NORMX:NORMY 0/0:0:0.58:-0.235:1.476:0.057
1 5700116 1:5700116-C-G C G . . AC=0;AF=0.00;ALLELE_A=C*;ALLELE_B=G;AN=2; GT:BAF:IGC:LRR:NORMX:NORMY 0/1:0:0.23:-0.111:1.093:0.118
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sample_name sample_id
testSample 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ProbeId,Name,GenomeBuild,Chr,Position,Ref,AlleleA,AlleleB,build37Flag
1,1:5700115-A-T,37,1,5700115,A,A,T,
2,1:5700116-C-G,37,1,5700116,C,C,G,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
sample_id probe_id GT_encoded NORMX NORMY BAF LRR
1 1 AA 1.476 0.057 0 -0.235
1 2 AB 1.093 0.118 0 -0.111
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1,testSample

0 comments on commit 2ad36f7

Please sign in to comment.