Skip to content

Commit

Permalink
support for non-AS called data (#6975)
Browse files Browse the repository at this point in the history
* support for non-AS called data

* fixed null case
  • Loading branch information
kcibul authored Nov 21, 2020
1 parent 2610b20 commit 5f40379
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_StrandBiasTest;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.bigquery.BigQueryUtils;
import org.broadinstitute.hellbender.utils.bigquery.GATKAvroReader;
import org.broadinstitute.hellbender.utils.bigquery.StorageAPIAvroReader;
Expand Down Expand Up @@ -130,6 +129,11 @@ private void processVQSRRecordForPosition(GenericRecord rec) {
String ref = rec.get("ref").toString();
String allele = rec.get("allele").toString();

if (allele == null || allele.equals("")) {
logger.warn("SEVERE WARNING: skipping " + contig + ":" + position + "(location="+location+") because it has a null alternate allele!");
return;
}

// Numbers are returned as Long (sci notation)
Double qual = Double.valueOf(rec.get(SchemaUtils.RAW_QUAL).toString());

Expand All @@ -147,10 +151,10 @@ private void processVQSRRecordForPosition(GenericRecord rec) {
Double raw_ad_gt_1 = Double.valueOf(rec.get("RAW_AD_GT_1").toString());

// TODO: KCIBUL QUESTION -- if we skip this... we won't have YNG Info @ extraction time?
if (raw_ad == 0) {
logger.info("skipping " + position + " because it has no alternate reads!");
return;
}
// if (raw_ad == 0) {
// logger.info("skipping " + contig + ":" + position + "(location="+location+") because it has no alternate reads!");
// return;
// }

int sb_ref_plus = Double.valueOf(rec.get("SB_REF_PLUS").toString()).intValue();
int sb_ref_minus = Double.valueOf(rec.get("SB_REF_MINUS").toString()).intValue();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,31 @@ public String getColumnValue(final VariantContext variant) {

AS_RAW_MQRankSum { // TODO -- maybe rely on 1/1 for call_GT, also get rid of the | at the beginning
public String getColumnValue(final VariantContext variant) {
// e.g. AS_RAW_MQRankSum=|1.4,1|NaN;
String out = getAttribute(variant, GATKVCFConstants.AS_RAW_MAP_QUAL_RANK_SUM_KEY, null);

if (out == null) {
// Try to use non-AS version
// TODO: it looks like the AS_RAW version also trims to a single decimal point??
// e.g. MQRankSum=1.465 and turn it into |1.465,1|
String outNotAlleleSpecific = getAttribute(variant, GATKVCFConstants.READ_POS_RANK_SUM_KEY, null);


if ( outNotAlleleSpecific == null || "".equals(outNotAlleleSpecific) || outNotAlleleSpecific.contentEquals("||") || outNotAlleleSpecific.contentEquals("|||") ) {
return "";
}

if (variant.getAlleles().size() == 3) { // GT 0/1 1/1
out = "|" + outNotAlleleSpecific + ",1|";

} else if (variant.getAlleles().size() == 4) { // GT 1/2
// TODO: just replicate rather than distribute, is this right?
out = "|" + outNotAlleleSpecific + ",1|" + outNotAlleleSpecific + ",1|";
} else {
throw new UserException("Expected diploid sample to either have 3 alleles (ref, alt, non-ref) or 4 alleles (ref, alt 1, alt 2, non-ref)");
}
}

if ( out == null || out.contentEquals("||") || out.contentEquals("|||") ) {
out = "";
return out;
Expand Down Expand Up @@ -176,7 +200,30 @@ public String getColumnValue(final VariantContext variant) {

AS_RAW_ReadPosRankSum { // TODO -- maybe rely on 1/1 for call_GT
public String getColumnValue(final VariantContext variant) {
// e.g. AS_RAW_ReadPosRankSum=|-0.3,1|0.6,1
String out = getAttribute(variant, GATKVCFConstants.AS_RAW_READ_POS_RANK_SUM_KEY, null);

if (out == null) {
// Try to use non-AS version
// TODO: it looks like the AS_RAW version also trims to a single decimal point??
// e.g. ReadPosRankSum=-0.511 and turn it into |-0.511,1|
String outNotAlleleSpecific = getAttribute(variant, GATKVCFConstants.READ_POS_RANK_SUM_KEY, null);

if ( outNotAlleleSpecific == null || "".equals(outNotAlleleSpecific) || outNotAlleleSpecific.contentEquals("||") || outNotAlleleSpecific.contentEquals("|||") ) {
return "";
}

if (variant.getAlleles().size() == 3) { // GT 0/1 1/1
out = "|" + outNotAlleleSpecific + ",1|";

} else if (variant.getAlleles().size() == 4) { // GT 1/2
// TODO: just replicate rather than distribute, is this right?
out = "|" + outNotAlleleSpecific + ",1|" + outNotAlleleSpecific + ",1|";
} else {
throw new UserException("Expected diploid sample to either have 3 alleles (ref, alt, non-ref) or 4 alleles (ref, alt 1, alt 2, non-ref)");
}
}

if (out == null || out.contentEquals("||") || out.contentEquals("|||") ) {
out = "";
return out;
Expand Down

0 comments on commit 5f40379

Please sign in to comment.