-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpep_alignments.nf
executable file
·128 lines (114 loc) · 3.79 KB
/
pep_alignments.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env nextflow
import java.math.RoundingMode;
import java.math.BigDecimal;
//
// MODULE IMPORT BLOCK
//
include { CAT_CAT } from '../../modules/nf-core/cat/cat/main'
include { BEDTOOLS_SORT } from '../../modules/nf-core/bedtools/sort/main'
include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main'
include { MINIPROT_INDEX } from '../../modules/nf-core/miniprot/index/main'
include { MINIPROT_ALIGN } from '../../modules/nf-core/miniprot/align/main'
include { EXTRACT_COV_IDEN } from '../../modules/local/extract_cov_iden'
workflow PEP_ALIGNMENTS {
take:
reference_tuple // Channel: tuple [ val(meta), path(file) ]
pep_files // Channel: tuple [ val(meta), path(file) ]
main:
ch_versions = Channel.empty()
//
// MODULE: CREATES INDEX OF REFERENCE FILE
//
MINIPROT_INDEX ( reference_tuple )
ch_versions = ch_versions.mix( MINIPROT_INDEX.out.versions )
//
// LOGIC: GETS LIST OF META AND PEP FILES FROM GENE_ALIGNMENT
// COMBINES WITH MINIPROT_INDEX OUTPUT
// CONVERTS TO TWO TUPLES FOR PEP DATA AND REFERENCE
//
pep_files
.flatten()
.buffer( size: 2 )
.combine ( MINIPROT_INDEX.out.index )
.multiMap { pep_meta, pep_file, miniprot_meta, miniprot_index ->
pep_tuple : tuple( [ id: pep_meta.id,
type: pep_meta.type,
org: pep_meta.org
],
pep_file )
index_file : tuple( [ id: "Reference",
],
miniprot_index )
}
.set { formatted_input }
//
// MODULE: ALIGNS PEP DATA WITH REFERENCE INDEX
// EMITS GFF FILE
//
MINIPROT_ALIGN (
formatted_input.pep_tuple,
formatted_input.index_file
)
ch_versions = ch_versions.mix( MINIPROT_ALIGN.out.versions )
//
// LOGIC: GROUPS OUTPUT GFFS BASED ON QUERY ORGANISMS AND DATA TYPE (PEP)
//
MINIPROT_ALIGN.out.gff
.map { meta, file ->
tuple(
[ id : meta.org + '_pep',
type : meta.type ],
file
)
}
.groupTuple( by: [0] )
.set { grouped_tuple }
//
// MODULE: AS ABOVE OUTPUT IS BED FORMAT, IT IS MERGED PER ORGANISM + TYPE
//
CAT_CAT (
grouped_tuple
)
ch_versions = ch_versions.mix( CAT_CAT.out.versions )
//
// LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE
//
CAT_CAT.out.file_out
.map { meta, file ->
tuple ( [ id: meta.id,
lines: file.countLines()
],
file
)
}
.set { bedtools_input }
//
// MODULE: SORTS ABOVE OUTPUT AND RETAINS GFF SUFFIX
// EMITS A MERGED GFF FILE
//
BEDTOOLS_SORT (
bedtools_input ,
[]
)
ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions )
//
// MODULE: CUTS GFF INTO PUNCHLIST
//
EXTRACT_COV_IDEN (
CAT_CAT.out.file_out
)
ch_versions = ch_versions.mix( EXTRACT_COV_IDEN.out.versions )
//
// MODULE: COMPRESS AND INDEX MERGED.GFF
// EMITS A TBI FILE
//
TABIX_BGZIPTABIX (
BEDTOOLS_SORT.out.sorted
)
ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions )
emit:
gff_file = BEDTOOLS_SORT.out.sorted
tbi_gff = TABIX_BGZIPTABIX.out.gz_tbi
pep_punch = EXTRACT_COV_IDEN.out.punchlist
versions = ch_versions.ifEmpty(null)
}