-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipeline.bash
76 lines (59 loc) · 2.7 KB
/
pipeline.bash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/bin/bash
##############################################################################
help_file="This is a free application used for quantifying DNA methylation level for RRBS data\n\nUSAGE: bash pipeline.bash -w/--workdir <work directory for sample> -b/--bam_file <bam file position> -s/--sample <sample name>\n\nRequired Arguments:\n\n-w/--workdir\tThe work directory for sample\n\n-b/--bam_file\tThe bam file position for sample\n\n-s/--sample\tThe sample name for analysis.\n\nOther:\n\n-h/--help\tDisplay the help file"
TEMP=`getopt -o w:s:b:h --long workdir:,sample:,bam:,help -- "$@"`
eval set -- "$TEMP"
while true ; do
case "$1" in
-w|--workdir)
wkd=$2 ; shift 2;;
-b|--bam)
bam=$2; shift 2;;
-s|--sample)
sample=$2;shift 2;;
-h|--help)
echo -e ${help_file}; exit 1 ;;
--) shift ; break ;;
*) echo "Internal error!" ; exit 1 ;;
esac
done
##############################################
# wkd=/data/sixone/lllab/RRBS/downstream_analysis
# bam=${wkd}/Wemics_example/example.bam
# sample=example
# # Work directory
# cd ${wkd}/Wemics_example
# Process the bam file from bismark(bam file was sorted by name)
# Separate the information of read1 and read2 from bam file
cd ${wkd}
samtools view -@10 ${sample}.bam|\
awk -v read1=${sample}_read1.out -v read2=${sample}_read2.out '{if(NR%2==1) print $0 >>read1;else print $0 >>read2}'
for read in read1 read2
do
cat ${sample}_${read}.out|awk 'BEGIN{FS=OFS="\t"} {gsub("XM:Z:","",$14);if($9>0) $9="+";else $9="-"; print $3,$4,$9,$6,$14}'> ${sample}_${read}.bed &
done
wait
paste -d "\t" ${sample}_read1.bed ${sample}_read2.bed|grep ^chr[1-9]|sort -k1,1V -k2,2n|awk 'BEGIN{FS=OFS="\t"} {print $1,$2,$3,$4,$5,$7,$8,$9,$10}'> ${sample}_reads12.bed
# Using chr21 as an example
contig=chr21
cat ${sample}_reads12.bed |grep ${contig} > ${sample}_${contig}.bed
# Process reads
python3 ${wkd}/Wemics_example/bin/process_reads.py \
${sample}_${contig}.bed \
${contig} \
${sample}_${contig}_processed.bed
# Sort reads
cat ${sample}_${contig}_processed.bed|sort -k1,1V -k2,2n -k3,3n > ${sample}_readmeth.bed
# Wemics qutification
python3 ${wkd}/Wemics_example/bin/Wemics.py \
${sample}_readmeth.bed \
${contig} \
${sample}_${contig}_Wemics.bed
# Merge all contig file into one
# cat $(for contig in chr{1..22};do echo "${sample}_${contig}_Wemics.bed ";done) \
# |awk 'BEGIN{FS=OFS="\t"} {print $1,$2,$3,$5,$4,$7}' > ${sample}_Wemics.bed
# Because we only have chr21 in our bam file, we could just use command below
cat ${sample}_${contig}_Wemics.bed|awk 'BEGIN{FS=OFS="\t"} {print $1,$2,$3,$5,$4,$7}' > ${sample}_Wemics.bed
# Remove temp file
# rm ${sample}_${contig}_Wemics.bed \
# ${sample}_read*.out ${sample}_read*.bed