Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VS-695. Updates to run Precision and Sensitivity on VQSR Lite #8230

Merged
merged 4 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ workflows:
branches:
- master
- ah_var_store
- gg_VS-695_RunPandSForVQSR_Lite
- name: GvsPopulateAltAllele
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsPopulateAltAllele.wdl
Expand All @@ -118,6 +119,7 @@ workflows:
branches:
- master
- ah_var_store
- gg_VS-695_RunPandSForVQSR_Lite
- name: GvsImportGenomes
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsImportGenomes.wdl
Expand Down Expand Up @@ -202,6 +204,7 @@ workflows:
branches:
- master
- ah_var_store
- gg_VS-695_RunPandSForVQSR_Lite
- name: GvsQuickstartVcfIntegration
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsQuickstartVcfIntegration.wdl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ task EvaluateVcf {
~{if all_records then "--all-records" else ""} \
--roc-subset snp,indel \
--vcf-score-field=INFO.~{max_score_field_tag} \
~{if use_classic_VQSR then "--sort-order descending" else "--sort-order ascending"} \
-t human_REF_SDF \
-b ~{truth_vcf} \
-e ~{truth_bed}\
Expand Down
12 changes: 6 additions & 6 deletions scripts/variantstore/wdl/GvsCreateFilterSet.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ workflow GvsCreateFilterSet {
String fq_filter_sites_destination_table = "~{project_id}.~{dataset_name}.filter_set_sites"

String fq_info_destination_table_schema = "filter_set_name:string,type:string,location:integer,ref:string,alt:string,vqslod:float,culprit:string,training_label:string,yng_status:string"
String fq_info_destination_table_vqsr_lite_schema = "filter_set_name:string,type:string,location:integer,ref:string,alt:string,calibration_sensitivity:float,culprit:string,training_label:string,yng_status:string"
String fq_info_destination_table_vqsr_lite_schema = "filter_set_name:string,type:string,location:integer,ref:string,alt:string,calibration_sensitivity:float,score:float,training_label:string,yng_status:string"

call Utils.GetBQTableLastModifiedDatetime as SamplesTableDatetimeCheck {
input:
Expand Down Expand Up @@ -181,7 +181,7 @@ workflow GvsCreateFilterSet {
output_basename = "${filter_set_name}.filtered.scored.indels"
}

call PopulateFilterSetInfo {
call PopulateFilterSetInfo {
input:
gatk_override = gatk_override,
filter_set_name = filter_set_name,
Expand Down Expand Up @@ -425,7 +425,7 @@ task ExtractFilterTask {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "7 GB"
disks: "local-disk 10 HDD"
bootDiskSizeGb: 15
Expand Down Expand Up @@ -506,7 +506,7 @@ task PopulateFilterSetInfo {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "3500 MB"
disks: "local-disk 250 HDD"
bootDiskSizeGb: 15
Expand Down Expand Up @@ -562,7 +562,7 @@ task PopulateFilterSetSites {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "3500 MB"
disks: "local-disk 200 HDD"
bootDiskSizeGb: 15
Expand Down Expand Up @@ -609,7 +609,7 @@ task PopulateFilterSetTranches {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "3500 MB"
disks: "local-disk 200 HDD"
bootDiskSizeGb: 15
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsExtractCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ task ExtractTask {
echo ~{interval_index},${OUTPUT_FILE_DEST},${OUTPUT_FILE_BYTES},${OUTPUT_FILE_INDEX_DEST},${OUTPUT_FILE_INDEX_BYTES} >> manifest.txt
>>>
runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "12 GB"
disks: "local-disk 150 HDD"
bootDiskSizeGb: 15
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class SchemaUtils {
public static final String FILTER_SET_NAME = "filter_set_name";
public static final String VQSLOD = "vqslod";
public static final String CALIBRATION_SENSITIVITY = "calibration_sensitivity";
public static final String SCORE = "score";
public static final String YNG_STATUS = "yng_status";

//Tranches table
Expand All @@ -65,7 +66,7 @@ public class SchemaUtils {

public static final List<String> SAMPLE_FIELDS = Arrays.asList(SchemaUtils.SAMPLE_NAME_FIELD_NAME, SchemaUtils.SAMPLE_ID_FIELD_NAME);
public static final List<String> YNG_FIELDS = Arrays.asList(FILTER_SET_NAME, LOCATION_FIELD_NAME, REF_ALLELE_FIELD_NAME, ALT_ALLELE_FIELD_NAME, VQSLOD, YNG_STATUS);
public static final List<String> VQSLITE_YNG_FIELDS = Arrays.asList(FILTER_SET_NAME, LOCATION_FIELD_NAME, REF_ALLELE_FIELD_NAME, ALT_ALLELE_FIELD_NAME, CALIBRATION_SENSITIVITY, YNG_STATUS);
public static final List<String> VQSLITE_YNG_FIELDS = Arrays.asList(FILTER_SET_NAME, LOCATION_FIELD_NAME, REF_ALLELE_FIELD_NAME, ALT_ALLELE_FIELD_NAME, CALIBRATION_SENSITIVITY, SCORE, YNG_STATUS);
public static final List<String> TRANCHE_FIELDS = Arrays.asList(TARGET_TRUTH_SENSITIVITY, MIN_VQSLOD, TRANCHE_FILTER_NAME, TRANCHE_MODEL);

public static final List<String> ALT_ALLELE_FIELDS = Arrays.asList(LOCATION_FIELD_NAME, SAMPLE_ID_FIELD_NAME, REF_ALLELE_FIELD_NAME, "allele", ALT_ALLELE_FIELD_NAME, "allele_pos", CALL_GT, AS_RAW_MQ, RAW_MQ, AS_RAW_MQRankSum, "raw_mqranksum_x_10", AS_QUALapprox, "qual", AS_RAW_ReadPosRankSum, "raw_readposranksum_x_10", AS_SB_TABLE, "SB_REF_PLUS","SB_REF_MINUS","SB_ALT_PLUS","SB_ALT_MINUS", CALL_AD, "ref_ad", "ad");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,14 @@ public enum SensitivityFilteringType { GENOTYPE, SITES, NONE }

@Argument(
fullName ="snps-truth-sensitivity-filter-level",
doc = "The truth sensitivity level at which to start filtering SNPs",
doc = "The truth sensitivity level above which to start filtering SNPs",
gbggrant marked this conversation as resolved.
Show resolved Hide resolved
optional = true
)
private Double truthSensitivitySNPThreshold = FilterSensitivityTools.DEFAULT_TRUTH_SENSITIVITY_THRESHOLD_SNPS / 100;

@Argument(
fullName = "indels-truth-sensitivity-filter-level",
doc = "The truth sensitivity level at which to start filtering INDELs",
doc = "The truth sensitivity level above which to start filtering INDELs",
optional = true
)
private Double truthSensitivityINDELThreshold = FilterSensitivityTools.DEFAULT_TRUTH_SENSITIVITY_THRESHOLD_INDELS / 100;
Expand Down Expand Up @@ -224,6 +224,7 @@ protected static VCFHeader generateVcfHeader(Set<String> sampleNames,
);
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_VQS_SENS_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_VQS_SCORE_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_YNG_STATUS_KEY));


Expand All @@ -247,6 +248,12 @@ protected String[] customCommandLineValidation() {
errors.add("Parameters 'project-id', 'dataset-id', 'call-set-identifier', 'wdl-step', 'wdl-call', and 'shardIdentifier' must be set if 'cost-observability-tablename' is set.");
}
}
if (truthSensitivitySNPThreshold < 0.0 || truthSensitivitySNPThreshold > 1.0) {
errors.add("Parameter 'snps-truth-sensitivity-filter-level' MUST be between 0.0 and 1.0 NOT: " + truthSensitivitySNPThreshold);
}
if (truthSensitivityINDELThreshold < 0.0 || truthSensitivityINDELThreshold > 1.0) {
errors.add("Parameter 'indels-truth-sensitivity-filter-level' MUST be between 0.0 and 1.0 NOT: " + truthSensitivityINDELThreshold);
}
if (!errors.isEmpty()) {
return errors.toArray(new String[0]);
}
Expand Down Expand Up @@ -274,7 +281,8 @@ protected void onStartup() {
}

if (!sensitivityFilteringType.equals(SensitivityFilteringType.NONE)) {
// TODO - put a validation that sensitivity between 0 and 1
logger.info("Passing all SNP variants with VQSLOD >= " + truthSensitivitySNPThreshold);
logger.info("Passing all INDEL variants with VQSLOD >= " + truthSensitivityINDELThreshold);

extraHeaderLines.add(new VCFFilterHeaderLine(GATKVCFConstants.VQS_SENS_FAILURE_SNP,
"Site failed SNP model calibration sensitivity cutoff (" + truthSensitivitySNPThreshold.toString() + ")"));
Expand Down
Loading