diff --git a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractFeaturesEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractFeaturesEngine.java index 5f5038b508b..5637b8ed016 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractFeaturesEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractFeaturesEngine.java @@ -19,7 +19,6 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_StrandBiasTest; import org.broadinstitute.hellbender.utils.QualityUtils; -import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.bigquery.BigQueryUtils; import org.broadinstitute.hellbender.utils.bigquery.GATKAvroReader; import org.broadinstitute.hellbender.utils.bigquery.StorageAPIAvroReader; @@ -130,6 +129,11 @@ private void processVQSRRecordForPosition(GenericRecord rec) { String ref = rec.get("ref").toString(); String allele = rec.get("allele").toString(); + if (allele == null || allele.equals("")) { + logger.warn("SEVERE WARNING: skipping " + contig + ":" + position + "(location="+location+") because it has a null alternate allele!"); + return; + } + // Numbers are returned as Long (sci notation) Double qual = Double.valueOf(rec.get(SchemaUtils.RAW_QUAL).toString()); @@ -147,10 +151,10 @@ private void processVQSRRecordForPosition(GenericRecord rec) { Double raw_ad_gt_1 = Double.valueOf(rec.get("RAW_AD_GT_1").toString()); // TODO: KCIBUL QUESTION -- if we skip this... we won't have YNG Info @ extraction time? - if (raw_ad == 0) { - logger.info("skipping " + position + " because it has no alternate reads!"); - return; - } + // if (raw_ad == 0) { + // logger.info("skipping " + contig + ":" + position + "(location="+location+") because it has no alternate reads!"); + // return; + // } int sb_ref_plus = Double.valueOf(rec.get("SB_REF_PLUS").toString()).intValue(); int sb_ref_minus = Double.valueOf(rec.get("SB_REF_MINUS").toString()).intValue(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/VetFieldEnum.java b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/VetFieldEnum.java index f8d27cd5f98..0a770a28dd8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/VetFieldEnum.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/VetFieldEnum.java @@ -121,7 +121,31 @@ public String getColumnValue(final VariantContext variant) { AS_RAW_MQRankSum { // TODO -- maybe rely on 1/1 for call_GT, also get rid of the | at the beginning public String getColumnValue(final VariantContext variant) { + // e.g. AS_RAW_MQRankSum=|1.4,1|NaN; String out = getAttribute(variant, GATKVCFConstants.AS_RAW_MAP_QUAL_RANK_SUM_KEY, null); + + if (out == null) { + // Try to use non-AS version + // TODO: it looks like the AS_RAW version also trims to a single decimal point?? + // e.g. MQRankSum=1.465 and turn it into |1.465,1| + String outNotAlleleSpecific = getAttribute(variant, GATKVCFConstants.READ_POS_RANK_SUM_KEY, null); + + + if ( outNotAlleleSpecific == null || "".equals(outNotAlleleSpecific) || outNotAlleleSpecific.contentEquals("||") || outNotAlleleSpecific.contentEquals("|||") ) { + return ""; + } + + if (variant.getAlleles().size() == 3) { // GT 0/1 1/1 + out = "|" + outNotAlleleSpecific + ",1|"; + + } else if (variant.getAlleles().size() == 4) { // GT 1/2 + // TODO: just replicate rather than distribute, is this right? + out = "|" + outNotAlleleSpecific + ",1|" + outNotAlleleSpecific + ",1|"; + } else { + throw new UserException("Expected diploid sample to either have 3 alleles (ref, alt, non-ref) or 4 alleles (ref, alt 1, alt 2, non-ref)"); + } + } + if ( out == null || out.contentEquals("||") || out.contentEquals("|||") ) { out = ""; return out; @@ -176,7 +200,30 @@ public String getColumnValue(final VariantContext variant) { AS_RAW_ReadPosRankSum { // TODO -- maybe rely on 1/1 for call_GT public String getColumnValue(final VariantContext variant) { + // e.g. AS_RAW_ReadPosRankSum=|-0.3,1|0.6,1 String out = getAttribute(variant, GATKVCFConstants.AS_RAW_READ_POS_RANK_SUM_KEY, null); + + if (out == null) { + // Try to use non-AS version + // TODO: it looks like the AS_RAW version also trims to a single decimal point?? + // e.g. ReadPosRankSum=-0.511 and turn it into |-0.511,1| + String outNotAlleleSpecific = getAttribute(variant, GATKVCFConstants.READ_POS_RANK_SUM_KEY, null); + + if ( outNotAlleleSpecific == null || "".equals(outNotAlleleSpecific) || outNotAlleleSpecific.contentEquals("||") || outNotAlleleSpecific.contentEquals("|||") ) { + return ""; + } + + if (variant.getAlleles().size() == 3) { // GT 0/1 1/1 + out = "|" + outNotAlleleSpecific + ",1|"; + + } else if (variant.getAlleles().size() == 4) { // GT 1/2 + // TODO: just replicate rather than distribute, is this right? + out = "|" + outNotAlleleSpecific + ",1|" + outNotAlleleSpecific + ",1|"; + } else { + throw new UserException("Expected diploid sample to either have 3 alleles (ref, alt, non-ref) or 4 alleles (ref, alt 1, alt 2, non-ref)"); + } + } + if (out == null || out.contentEquals("||") || out.contentEquals("|||") ) { out = ""; return out;