From 17bc5bf1b7af5bffb92203de484df36b3b414e9a Mon Sep 17 00:00:00 2001
From: Andrey Smirnov
Date: Thu, 16 Sep 2021 15:49:00 -0400
Subject: [PATCH] Addressed PR comments. GermlineCNVCaller will now throw an
exception if any arguments not applicable in CASE mode were set by the user.
---
.../tools/copynumber/GermlineCNVCaller.java | 19 +++++++++++-------
.../GermlineCallingArgumentCollection.java | 14 ++++++++++++-
...mlineDenoisingModelArgumentCollection.java | 20 ++++++++++++++++++-
.../GermlineCNVCallerIntegrationTest.java | 17 ++++++++++++++++
4 files changed, 61 insertions(+), 9 deletions(-)
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java
index d1476a1ab3c..b5e3c911b35 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java
@@ -5,6 +5,7 @@
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
+import org.broadinstitute.barclay.argparser.CommandLineArgumentParser;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
@@ -76,6 +77,9 @@
* diverges the second time we suggest checking if input count or karyotype values or other inputs are abnormal
* (an example of abnormality is a count file containing mostly zeros).
*
+ * More details about the model and inference procedure can be found in the white paper
+ * https://github.com/broadinstitute/gatk/blob/master/docs/CNV/germline-cnv-caller-model.pdf
+ *
* The computation done by this tool, aside from input data parsing and validation, is performed outside of the Java
@@ -121,11 +125,11 @@
*
The tool will be run in CASE mode using the argument {@code run-mode CASE}. The path to a previously
* obtained model directory must be provided via the {@code model} argument in this mode. The modeled intervals are
- * then specified by a file contained in the model directory,and all model intervals must be present in all of the
- * input count files. All interval-related arguments (e.g. {@code interval-psi-scale} argument) are ignored in this
- * mode, however an advanced user can adjust various sample-related (e.g. {@code sample-psi-scale}) and global
- * (e.g. {@code p_alt}) arguments for custom applications of the tool. Inference-related (e.g.
- * {@code min_training_epochs}) arguments can be adjusted as well. The tool output in CASE mode is only the "-calls"
+ * then specified by a file contained in the model directory, and all model intervals must be present in all of the
+ * input count files. All interval-related arguments (e.g. {@code interval-psi-scale}) are ignored in this
+ * mode. However an advanced user can adjust various sample-related (e.g. {@code sample-psi-scale}) and global
+ * (e.g. {@code p_alt}) arguments for custom applications of the tool. Inference-related arguments (e.g.
+ * {@code min_training_epochs}) can be adjusted as well. The tool output in CASE mode is only the "-calls"
* subdirectory and is organized similarly to that in COHORT mode.
*
* Note that at the moment, this tool does not automatically verify the compatibility of the provided parametrization
@@ -362,8 +366,9 @@ protected Object doWork() {
}
private void validateArguments() {
- germlineCallingArgumentCollection.validate();
- germlineDenoisingModelArgumentCollection.validate();
+ final CommandLineArgumentParser clpParser = (CommandLineArgumentParser) getCommandLineParser();
+ germlineCallingArgumentCollection.validate(clpParser, runMode);
+ germlineDenoisingModelArgumentCollection.validate(clpParser, runMode);
germlineCNVHybridADVIArgumentCollection.validate();
Utils.validateArg(inputReadCountPaths.size() == new HashSet<>(inputReadCountPaths).size(),
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java
index cffad07d934..df7383ece64 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java
@@ -1,7 +1,10 @@
package org.broadinstitute.hellbender.tools.copynumber.arguments;
+import com.google.common.collect.ImmutableList;
import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.CommandLineArgumentParser;
import org.broadinstitute.hellbender.tools.copynumber.GermlineCNVCaller;
+import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;
import java.io.Serializable;
@@ -21,6 +24,11 @@ public final class GermlineCallingArgumentCollection implements Serializable {
public static final String CLASS_COHERENCE_LENGTH_LONG_NAME = "class-coherence-length";
public static final String MAX_COPY_NUMBER_LONG_NAME = "max-copy-number";
+ // these model parameters will be extracted from provided model in CASE mode
+ private static final List HIDDEN_ARGS_CASE_MODE = ImmutableList.of(
+ P_ALT_LONG_NAME,
+ CLASS_COHERENCE_LENGTH_LONG_NAME);
+
@Argument(
doc = "Total prior probability of alternative copy-number states (the reference copy-number " +
"is set to the contig integer ploidy)",
@@ -78,7 +86,11 @@ public List generatePythonArguments(final GermlineCNVCaller.RunMode runM
return arguments;
}
- public void validate() {
+ public void validate(final CommandLineArgumentParser clpParser, final GermlineCNVCaller.RunMode runMode) {
+ if (runMode == GermlineCNVCaller.RunMode.CASE)
+ HIDDEN_ARGS_CASE_MODE.forEach(a -> Utils.validateArg(
+ !clpParser.getNamedArgumentDefinitionByAlias(a).getHasBeenSet(),
+ String.format("Argument '--%s' cannot be set in the CASE mode.", a)));
ParamUtils.isPositive(cnvCoherenceLength,
String.format("Coherence length of CNV events (%s) must be positive.",
CNV_COHERENCE_LENGTH_LONG_NAME));
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java
index 2c00f60bc14..207dc131e8f 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java
@@ -1,8 +1,12 @@
package org.broadinstitute.hellbender.tools.copynumber.arguments;
+import com.google.common.collect.ImmutableList;
import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.CommandLineArgumentParser;
+import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.tools.copynumber.GermlineCNVCaller;
import org.broadinstitute.hellbender.utils.param.ParamUtils;
+import org.broadinstitute.hellbender.utils.Utils;
import java.io.Serializable;
import java.util.ArrayList;
@@ -28,6 +32,16 @@ public final class GermlineDenoisingModelArgumentCollection implements Serializa
public static final String ENABLE_BIAS_FACTORS_LONG_NAME = "enable-bias-factors";
public static final String ACTIVE_CLASS_PADDING_HYBRID_MODE_LONG_NAME = "active-class-padding-hybrid-mode";
+ // these model parameters will be extracted from provided model in CASE mode
+ private static final List HIDDEN_ARGS_CASE_MODE = ImmutableList.of(
+ MAX_BIAS_FACTORS_LONG_NAME,
+ INTERVAL_PSI_SCALE_LONG_NAME,
+ LOG_MEAN_BIAS_STANDARD_DEVIATION_LONG_NAME,
+ INIT_ARD_REL_UNEXPLAINED_VARIANCE_LONG_NAME,
+ ENABLE_BIAS_FACTORS_LONG_NAME,
+ NUM_GC_BINS_LONG_NAME,
+ GC_CURVE_STANDARD_DEVIATION_LONG_NAME);
+
public enum CopyNumberPosteriorExpectationMode {
MAP("map"),
EXACT("exact"),
@@ -168,7 +182,11 @@ public List generatePythonArguments(final GermlineCNVCaller.RunMode runM
return arguments;
}
- public void validate() {
+ public void validate(final CommandLineArgumentParser clpParser, final GermlineCNVCaller.RunMode runMode) {
+ if (runMode == GermlineCNVCaller.RunMode.CASE)
+ HIDDEN_ARGS_CASE_MODE.forEach(a -> Utils.validateArg(
+ !clpParser.getNamedArgumentDefinitionByAlias(a).getHasBeenSet(),
+ String.format("Argument '--%s' cannot be set in the CASE mode.", a)));
ParamUtils.isPositive(maxBiasFactors,
String.format("Maximum number of bias factors (%s) must be positive.",
MAX_BIAS_FACTORS_LONG_NAME));
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java
index 0a7b73a0f41..e24d145d4c1 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java
@@ -5,6 +5,7 @@
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberStandardArgument;
+import org.broadinstitute.hellbender.tools.copynumber.arguments.GermlineDenoisingModelArgumentCollection;
import org.broadinstitute.hellbender.utils.IntervalMergingRule;
import org.testng.annotations.Test;
@@ -71,6 +72,22 @@ public void testCaseWithoutModel() {
runCommandLine(argsBuilder);
}
+ @Test(groups = {"python"}, expectedExceptions = IllegalArgumentException.class)
+ public void testCaseWithHiddenArguments() {
+ final ArgumentsBuilder argsBuilder = new ArgumentsBuilder();
+ Arrays.stream(TEST_COUNT_FILES, 0, 5).forEach(argsBuilder::addInput);
+ argsBuilder.add(GermlineCNVCaller.RUN_MODE_LONG_NAME, GermlineCNVCaller.RunMode.CASE.name())
+ .add(GermlineCNVCaller.CONTIG_PLOIDY_CALLS_DIRECTORY_LONG_NAME,
+ CONTIG_PLOIDY_CALLS_OUTPUT_DIR.getAbsolutePath())
+ .add(CopyNumberStandardArgument.MODEL_LONG_NAME,
+ new File(OUTPUT_DIR, "test-germline-cnv-cohort-model").getAbsolutePath())
+ .add(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath())
+ .add(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-germline-cnv-case");
+ // add argument that is not applicable in CASE mode
+ argsBuilder.add(GermlineDenoisingModelArgumentCollection.INTERVAL_PSI_SCALE_LONG_NAME, 0.1);
+ runCommandLine(argsBuilder);
+ }
+
@Test(groups = {"python"}, enabled = false)
public void testCohortWithInputModel() {
}