-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathseqr_loader.toml
114 lines (89 loc) · 3.47 KB
/
seqr_loader.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
[workflow]
name = 'seqr_loader'
status_reporter = 'metamist'
# Use GnarlyGenotyper instead of GenotypeGVCFs
use_gnarly = false
# Use allele-specific annotations for VQSR
use_as_vqsr = true
# Version of VEP to use, currently operational: 105, 110
vep_version = '110'
# Realign CRAM when available, instead of using FASTQ.
# The parameter value should correspond to CRAM version
# (e.g. v0 in gs://cpg-fewgenomes-main/cram/v0/CPGaaa.cram
#realign_from_cram_version = 'v0'
# Write the sorted BAM file to the temp bucket after alignment and before MarkDuplicates
checkpoint_sorted_bam = false
# Calling intervals (defauls to whole genome intervals)
#intervals_path =
# Write dataset MTs for these datasets. Required for the AnnotateDataset stage
# write_mt_for_datasets = []
# Create Seqr ElasticSearch indices for these datasets. Required for the MtToEs stage.
# create_es_index_for_datasets = []
write_vcf = ["udn-aus"]
# Add in specific multiQC report config options
# See https://multiqc.info/docs/getting_started/config for more details
# [workflow.cram_multiqc]
# send_to_slack = true
# [workflow.cram_multiqc.extra_config]
# plots_force_interactive = true
# [workflow.gvcf_multiqc]
# send_to_slack = true
# [workflow.gvcf_multiqc.extra_config]
# plots_force_interactive = true
# By default, run the es-index generating jobs on spot (preemptible) instances
[workflow.es_index]
spot_instance = true
[resource_overrides]
# Override default resource requirements for unusually large seq data without
# demanding higher resources for all operations as standard. Examples below
# picard MarkDuplicates overrides for unreasnobly large sequnce groups
#picard_mem_gb = 100
#picard_storage_gb = 350
# haplotype caller overrides, see production-pipelines PR#381
# defaults in code are 40 for genomes, none for exomes
#haplotypecaller_storage = 80
# JointGenotyping GenomicsDBImport job overrides
# genomicsdb_import_mem_gb = 32
# genomicsdb_import_use_highmem = false
# JointGenotyping GenotypeGVCFs job overrides
# genotype_gvcfs_mem_gb = 15
# genotype_gvcfs_use_highmem = false
[vqsr]
# VQSR, when applying model, targets indel_filter_level and snp_filter_level
# sensitivities. The tool matches them internally to a VQSLOD score cutoff
# based on the model's estimated sensitivity to a set of true variants.
snp_filter_level = 99.7
indel_filter_level = 99.0
[cramqc]
assume_sorted = true
num_pcs = 4
[qc_thresholds.genome.min]
"MEDIAN_COVERAGE" = 10
"PCT_PF_READS_ALIGNED" = 0.80
[qc_thresholds.genome.max]
"FREEMIX" = 0.04
"PERCENT_DUPLICATION" = 25
[hail]
pool_label = 'seqr'
billing_project = 'seqr'
[slack]
channel = 'workflows-qc'
token_secret_id = 'slack-seqr-loader-token'
token_project_id = 'seqr-308602'
[elasticsearch]
# Configure access to ElasticSearch server
port = '9243'
host = 'elasticsearch.es.australia-southeast1.gcp.elastic-cloud.com'
username = 'seqr'
# Load ElasticSearch password from a secret, unless SEQR_ES_PASSWORD is set
password_secret_id = 'seqr-es-password'
password_project_id = 'seqr-308602'
# temporary overrides until we rename images/config
[images]
vep_105 = "australia-southeast1-docker.pkg.dev/cpg-common/images/vep:105.0"
[references]
vep_105_mount = "gs://cpg-common-main/references/vep/105.0/mount"
vep_110_mount = "gs://cpg-common-main/references/vep/110/mount"
[references.hg38_telomeres_and_centromeres_intervals]
# Derived from hg38.telomeresAndMergedCentromeres.bed used in gnomAD v3
interval_list = "gs://cpg-common-main/references/hg38/v0/hg38.telomeresAndMergedCentromeres.interval_list"