-
Notifications
You must be signed in to change notification settings - Fork 736
/
Copy pathrun_wgs_case_study_docker.sh
executable file
·105 lines (89 loc) · 3.73 KB
/
run_wgs_case_study_docker.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/bin/bash
# Copyright 2018 Google LLC.
set -euo pipefail
## Preliminaries
# Set a number of shell variables, to make what follows easier to read.
BASE="${HOME}/case-study"
BIN_VERSION="1.0.0"
INPUT_DIR="${BASE}/input/data"
REF="GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
BAM="HG002.novaseq.pcr-free.35x.dedup.grch38_no_alt.bam"
TRUTH_VCF="HG002_GRCh38_1_22_v4.2_benchmark.vcf.gz"
TRUTH_BED="HG002_GRCh38_1_22_v4.2_benchmark.bed"
N_SHARDS="64"
OUTPUT_DIR="${BASE}/output"
OUTPUT_VCF="HG002.output.vcf.gz"
OUTPUT_GVCF="HG002.output.g.vcf.gz"
LOG_DIR="${OUTPUT_DIR}/logs"
## Create local directory structure
mkdir -p "${OUTPUT_DIR}"
mkdir -p "${INPUT_DIR}"
mkdir -p "${LOG_DIR}"
## Download extra packages
# Install aria2 to download data files.
sudo apt-get -qq -y update
sudo apt-get -qq -y install aria2
if ! hash docker 2>/dev/null; then
echo "'docker' was not found in PATH. Installing docker..."
# Install docker using instructions on:
# https://docs.docker.com/install/linux/docker-ce/ubuntu/
sudo apt-get -qq -y install \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"
sudo apt-get -qq -y update
sudo apt-get -qq -y install docker-ce
fi
# Copy the data, using http:// because of https://github.com/aria2/aria2/issues/1012.
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${TRUTH_BED}" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${TRUTH_VCF}" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${TRUTH_VCF}.tbi" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${BAM}" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${BAM}.bai" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${REF}.gz" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${REF}.gz.fai" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${REF}.gz.gzi" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${REF}.gzi" -d "${INPUT_DIR}"
aria2c -c -x10 -s10 "http://storage.googleapis.com/deepvariant/case-study-testdata/${REF}.fai" -d "${INPUT_DIR}"
## Pull the docker image.
sudo docker pull google/deepvariant:"${BIN_VERSION}"
echo "Run DeepVariant..."
sudo docker run \
-v "${INPUT_DIR}":"/input" \
-v "${OUTPUT_DIR}:/output" \
google/deepvariant:"${BIN_VERSION}" \
/opt/deepvariant/bin/run_deepvariant \
--model_type=WGS \
--ref="/input/${REF}.gz" \
--reads="/input/${BAM}" \
--output_vcf=/output/${OUTPUT_VCF} \
--output_gvcf=/output/${OUTPUT_GVCF} \
--num_shards=${N_SHARDS}
echo "Done."
echo
## Evaluation: run hap.py
echo "Start evaluation with hap.py..."
UNCOMPRESSED_REF="${INPUT_DIR}/${REF}"
# hap.py cannot read the compressed fa, so uncompress
# into a writable directory. Index file was downloaded earlier.
zcat <"${INPUT_DIR}/${REF}.gz" >"${UNCOMPRESSED_REF}"
sudo docker pull pkrusche/hap.py
( sudo docker run -i \
-v "${INPUT_DIR}:${INPUT_DIR}" \
-v "${OUTPUT_DIR}:${OUTPUT_DIR}" \
pkrusche/hap.py /opt/hap.py/bin/hap.py \
"${INPUT_DIR}/${TRUTH_VCF}" \
"${OUTPUT_DIR}/${OUTPUT_VCF}" \
-f "${INPUT_DIR}/${TRUTH_BED}" \
-r "${UNCOMPRESSED_REF}" \
-o "${OUTPUT_DIR}/happy.output" \
--engine=vcfeval
) 2>&1 | tee "${LOG_DIR}/happy.log"
echo "Done."