-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDenovoGearPostProcessing.wdl
156 lines (113 loc) · 3.65 KB
/
DenovoGearPostProcessing.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
workflow DenovoGearPostProcessing {
Array[File] DNGOutputFiles_array
call CombineDenovoGearOutput { input: DNGOutputFiles=DNGOutputFiles_array }
call NumericGenotype {
input:
python_file=python_file,
DNG_file=CombineDenovoGearOutput.CombinedDNGOutput
}
call SelectDNMGenotype {
input:
Numeric_Genotype_input=NumericGenotype.DenovoGear_NumericGenotype_output
}
call SplitSnpIndel {
input:
Combined_DNG_Numeric_Genotype_file=SelectDNMGenotype.DenovoGear_DNM_Genotype_output
}
call ListOfDNMs {
input:
DenovoGear_DNMs_file=SelectDNMGenotype.DenovoGear_DNM_Genotype_output,
DenovoGear_snp_file=SplitSnpIndel.DenovoGear_snp_file,
DenovoGear_indel_file=SplitSnpIndel.DenovoGear_indel_file
}
}
## Combines all the output files (chromosome wise) generated from running DenovoGear caller into one file.
task CombineDenovoGearOutput {
Array[File] DNGOutputFiles
command {
cat ${sep=" " DNGOutputFiles} > combinedoutput.DNG.txt
}
runtime {
docker: "ubuntu:18.04"
memory: "2GB"
cpu: 1
disks: "local-disk"
}
output {
File CombinedDNGOutput = "combinedoutput.DNG.txt"
}
}
## Converts genotype of variants into binary format. e.g. C/T is converted to 0/1 based on reference.
task NumericGenotype {
File python_file
File DNG_file
command {
python ${python_file} ${DNG_file} > DenovoGear_NumericGenotype.txt
}
runtime {
docker: "python:2.7.18-stretch"
memory: "4GB"
cpu: 2
disks: "local-disk"
}
output {
File DenovoGear_NumericGenotype_output = "DenovoGear_NumericGenotype.txt"
}
}
## Filter variants with de novo mutation genotype pattern (Child = 0/1, father = 0/0, mother = 0/0)
## Here 0/1 = heterozygous alternate and 0/0 = homozygous reference.
task SelectDNMGenotype {
File Numeric_Genotype_input
command <<<
awk '{ if (($47 == "0/1") && ($49 == "0/0") && ($51 == "0/0")) { print } }' ${Numeric_Genotype_input} > DenovoGear_DNM_Genotype_output.txt
>>>
runtime {
docker: "ubuntu:18.04"
memory: "1GB"
cpu: 1
disks: "local-disk"
}
output {
File DenovoGear_DNM_Genotype_output = "DenovoGear_DNM_Genotype_output.txt"
}
}
## Separate files for SNP and INDELs are generated.
task SplitSnpIndel {
File Combined_DNG_Numeric_Genotype_file
command {
grep "DENOVO-SNP" ${Combined_DNG_Numeric_Genotype_file} > DenovoGear_snp_file.txt | \
grep "DENOVO-INDEL" ${Combined_DNG_Numeric_Genotype_file} > DenovoGear_indel_file.txt
}
runtime {
docker: "ubuntu:18.04"
memory: "1GB"
cpu: 1
disks: "local-disk"
}
output {
File DenovoGear_snp_file = "DenovoGear_snp_file.txt"
File DenovoGear_indel_file = "DenovoGear_indel_file.txt"
}
}
## Lists of de novo mutations with chromosome and position are generated for snps, indels and both.
task ListOfDNMs {
File DenovoGear_DNMs_file
File DenovoGear_snp_file
File DenovoGear_indel_file
command {
cut -f5,7 -d' ' ${DenovoGear_DNMs_file} | sed 's/ /|/g' | sort | uniq > DenovoGear_listof_DNMs_file.txt | \
cut -f5,7 -d' ' ${DenovoGear_snp_file} | sed 's/ /|/g' | sort | uniq > DenovoGear_listof_snps_file.txt | \
cut -f5,7 -d' ' ${DenovoGear_indel_file} | sed 's/ /|/g' | sort | uniq > DenovoGear_listof_indels_file.txt
}
runtime {
docker: "ubuntu:18.04"
memory: "1GB"
cpu: 1
disks: "local-disk"
}
output {
File DenovoGear_DNMs_file_output = "DenovoGear_listof_DNMs_file.txt"
File DenovoGear_list_of_snps_output = "DenovoGear_listof_snps_file.txt"
File DenovoGear_list_of_indels_output = "DenovoGear_listof_indels_file.txt"
}
}