-
Notifications
You must be signed in to change notification settings - Fork 598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
313 Cleanup Extract Cohort params #7293
Changes from all commits
959a1fc
ada58dd
97d910f
d17ae5a
b658743
9b24bf4
68f8ea3
2e3404c
1e42148
2f64b55
ddc2ff6
5ff3e49
e8fabb0
eac55f5
b9172aa
b4c32f2
d3530ab
bac5b0b
1d33ba7
9ce72ed
dafffbc
f064080
b8932da
31512a5
2748c9c
91767ae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,9 @@ public class ExtractCohort extends ExtractTool { | |
private static final Logger logger = LogManager.getLogger(ExtractCohort.class); | ||
private ExtractCohortEngine engine; | ||
|
||
@Argument( | ||
public enum VQSLODFilteringType { GENOTYPE, SITES, NONE } | ||
|
||
@Argument( | ||
fullName = "filter-set-info-table", | ||
doc = "Fully qualified name of the filtering set info table to use for cohort extraction", | ||
optional = true | ||
|
@@ -86,29 +88,27 @@ public class ExtractCohort extends ExtractTool { | |
) | ||
private boolean emitPLs = false; | ||
|
||
@Argument( | ||
fullName = "disable-gnarly", | ||
doc = "Disable use of GnarlyGenotyper", | ||
optional = true | ||
) | ||
private boolean disableGnarlyGenotyper = true; | ||
// what if this was a flag input only? | ||
|
||
@Argument( | ||
fullName = "vqslod-filter-genotypes", | ||
doc = "Should VQSLOD filtering be applied at the genotype level", | ||
fullName = "vqslod-filter-by-site", | ||
doc = "If VQSLOD filtering is applied, it should be at a site level. Default is false", | ||
optional = true | ||
) | ||
private boolean performGenotypeVQSLODFiltering = true; | ||
private boolean performSiteSpecificVQSLODFiltering = false; | ||
private VQSLODFilteringType vqslodfilteringType = VQSLODFilteringType.NONE; | ||
|
||
@Argument( | ||
fullName ="snps-truth-sensitivity-filter-level", | ||
mutex = {"snps-lod-score-cutoff"}, | ||
doc = "The truth sensitivity level at which to start filtering SNPs", | ||
optional = true | ||
) | ||
private Double truthSensitivitySNPThreshold = null; | ||
|
||
@Argument( | ||
fullName = "indels-truth-sensitivity-filter-level", | ||
mutex = {"indels-lod-score-cutoff"}, | ||
doc = "The truth sensitivity level at which to start filtering INDELs", | ||
optional = true | ||
) | ||
|
@@ -117,13 +117,15 @@ public class ExtractCohort extends ExtractTool { | |
@Advanced | ||
@Argument( | ||
fullName = "snps-lod-score-cutoff", | ||
mutex = {"snps-truth-sensitivity-filter-level"}, | ||
doc = "The VQSLOD score below which to start filtering SNPs", | ||
optional = true) | ||
private Double vqsLodSNPThreshold = null; | ||
|
||
@Advanced | ||
@Argument( | ||
fullName = "indels-lod-score-cutoff", | ||
mutex = {"indels-truth-sensitivity-filter-level"}, | ||
doc = "The VQSLOD score below which to start filtering INDELs", | ||
optional = true) | ||
private Double vqsLodINDELThreshold = null; | ||
|
@@ -143,30 +145,41 @@ public class ExtractCohort extends ExtractTool { | |
protected void onStartup() { | ||
super.onStartup(); | ||
|
||
if ( (filterSetInfoTableName != null || filterSetSiteTableName != null) && (filterSetName == null || filterSetName.equals(""))) { | ||
throw new UserException("--filter-set-name must be specified if any filtering related operations are requested"); | ||
Set<VCFHeaderLine> extraHeaderLines = new HashSet<>(); | ||
|
||
if (filterSetInfoTableName != null) { // filter using vqslod-- default to GENOTYPE unless SITES specifically selected | ||
vqslodfilteringType = performSiteSpecificVQSLODFiltering ? VQSLODFilteringType.SITES : VQSLODFilteringType.GENOTYPE; | ||
} | ||
|
||
Set<VCFHeaderLine> extraHeaderLines = new HashSet<>(); | ||
if (filterSetInfoTableName != null) { | ||
FilterSensitivityTools.validateFilteringCutoffs(truthSensitivitySNPThreshold, truthSensitivityINDELThreshold, vqsLodSNPThreshold, vqsLodINDELThreshold, tranchesTableName); | ||
Map<String, Map<Double, Double>> trancheMaps = FilterSensitivityTools.getTrancheMaps(filterSetName, tranchesTableName, projectID); | ||
|
||
if (vqsLodSNPThreshold != null) { // we already have vqslod thresholds directly | ||
extraHeaderLines.add(FilterSensitivityTools.getVqsLodHeader(vqsLodSNPThreshold, GATKVCFConstants.SNP)); | ||
extraHeaderLines.add(FilterSensitivityTools.getVqsLodHeader(vqsLodINDELThreshold, GATKVCFConstants.INDEL)); | ||
} else { // using sensitivity threshold inputs; need to convert these to vqslod thresholds | ||
vqsLodSNPThreshold = FilterSensitivityTools.getVqslodThreshold(trancheMaps.get(GATKVCFConstants.SNP), truthSensitivitySNPThreshold, GATKVCFConstants.SNP); | ||
vqsLodINDELThreshold = FilterSensitivityTools.getVqslodThreshold(trancheMaps.get(GATKVCFConstants.INDEL), truthSensitivityINDELThreshold, GATKVCFConstants.INDEL); | ||
// set headers | ||
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityHeader(truthSensitivitySNPThreshold, vqsLodSNPThreshold, GATKVCFConstants.SNP)); | ||
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityHeader(truthSensitivityINDELThreshold, vqsLodINDELThreshold, GATKVCFConstants.INDEL)); | ||
} | ||
// filter at a site level (but not necesarily use vqslod) | ||
if ((filterSetSiteTableName != null && filterSetName == null) || (filterSetSiteTableName == null && filterSetName != null)) { | ||
throw new UserException("--filter-set-name and --filter-set-site-table are both necessary for any filtering related operations"); | ||
} | ||
if (!vqslodfilteringType.equals(VQSLODFilteringType.NONE)) { | ||
if (filterSetInfoTableName == null || filterSetSiteTableName == null || filterSetName == null) { | ||
throw new UserException(" --filter-set-site-table, --filter-set-name and --filter-set-site-table are all necessary for any vqslod filtering operations"); | ||
} | ||
} | ||
|
||
if (!vqslodfilteringType.equals(VQSLODFilteringType.NONE)) { | ||
FilterSensitivityTools.validateFilteringCutoffs(truthSensitivitySNPThreshold, truthSensitivityINDELThreshold, vqsLodSNPThreshold, vqsLodINDELThreshold, tranchesTableName); | ||
Map<String, Map<Double, Double>> trancheMaps = FilterSensitivityTools.getTrancheMaps(filterSetName, tranchesTableName, projectID); | ||
|
||
if (vqsLodSNPThreshold != null) { // we already have vqslod thresholds directly | ||
extraHeaderLines.add(FilterSensitivityTools.getVqsLodHeader(vqsLodSNPThreshold, GATKVCFConstants.SNP)); | ||
extraHeaderLines.add(FilterSensitivityTools.getVqsLodHeader(vqsLodINDELThreshold, GATKVCFConstants.INDEL)); | ||
} else { // using sensitivity threshold inputs; need to convert these to vqslod thresholds | ||
vqsLodSNPThreshold = FilterSensitivityTools.getVqslodThreshold(trancheMaps.get(GATKVCFConstants.SNP), truthSensitivitySNPThreshold, GATKVCFConstants.SNP); | ||
vqsLodINDELThreshold = FilterSensitivityTools.getVqslodThreshold(trancheMaps.get(GATKVCFConstants.INDEL), truthSensitivityINDELThreshold, GATKVCFConstants.INDEL); | ||
// set headers | ||
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityHeader(truthSensitivitySNPThreshold, vqsLodSNPThreshold, GATKVCFConstants.SNP)); | ||
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityHeader(truthSensitivityINDELThreshold, vqsLodINDELThreshold, GATKVCFConstants.INDEL)); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe not for this PR, but I'm wondering if we should make FilterSensititvityTools an class where we create instances. We would pass it all the params on construction, it would do the validation. It could also add the headers and then we just ask it for the 2 thresholds so all the logic is encapsulated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. true---it does seem like a lot of params to be passing around |
||
} | ||
|
||
extraHeaderLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME)); | ||
|
||
if (performGenotypeVQSLODFiltering) { | ||
if (vqslodfilteringType.equals(VQSLODFilteringType.GENOTYPE)) { | ||
extraHeaderLines.add(new VCFFormatHeaderLine("FT", 1, VCFHeaderLineType.String, "Genotype Filter Field")); | ||
} | ||
|
||
|
@@ -223,8 +236,7 @@ protected void onStartup() { | |
progressMeter, | ||
filterSetName, | ||
emitPLs, | ||
disableGnarlyGenotyper, | ||
performGenotypeVQSLODFiltering, | ||
vqslodfilteringType, | ||
excludeFilteredSites); | ||
|
||
vcfWriter.writeHeader(header); | ||
|
@@ -236,7 +248,7 @@ public void traverse() { | |
progressMeter.setRecordsBetweenTimeChecks(100L); | ||
|
||
if ( filterSetInfoTableName == null || filterSetInfoTableName.equals("") ) { | ||
logger.warn("--variant-filter-table is not specified, no filtering of cohort! "); | ||
logger.warn("--filter-set-info-table is not specified, no filtering of cohort! "); | ||
} | ||
|
||
engine.traverse(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nice docs