Skip to content

Commit

Permalink
Add output index type to samtools view
Browse files Browse the repository at this point in the history
  • Loading branch information
fellen31 committed Feb 14, 2025
1 parent 27978ac commit bb3ec89
Show file tree
Hide file tree
Showing 4 changed files with 867 additions and 145 deletions.
36 changes: 31 additions & 5 deletions modules/nf-core/samtools/view/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ process SAMTOOLS_VIEW {
tuple val(meta), path(input), path(index)
tuple val(meta2), path(fasta)
path qname
val index_format

output:
tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true
Expand All @@ -20,7 +21,7 @@ process SAMTOOLS_VIEW {
tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true
tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true
tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true
tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true
tuple val(meta), path("${prefix}.unselected.${file_type}.{csi,crai}"), emit: unselected_index, optional: true
path "versions.yml", emit: versions

when:
Expand All @@ -35,16 +36,27 @@ process SAMTOOLS_VIEW {
args.contains("--output-fmt bam") ? "bam" :
args.contains("--output-fmt cram") ? "cram" :
input.getExtension()

output_file = index_format ? "${prefix}.${file_type}##idx##${prefix}.${file_type}.${index_format}" : "${prefix}.${file_type}"
// Can't choose index type of unselected file
readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": ""

if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
if (index_format) {
if (!index_format.matches('bai|csi|crai')) {
error "Index format not one of bai, csi, crai."
} else if (file_type == "sam") {
error "Indexing not compatible with SAM output"
}
}
"""
samtools \\
view \\
--threads ${task.cpus-1} \\
${reference} \\
${readnames} \\
$args \\
-o ${prefix}.${file_type} \\
-o ${output_file} \\
$input \\
$args2
Expand All @@ -61,13 +73,27 @@ process SAMTOOLS_VIEW {
args.contains("--output-fmt bam") ? "bam" :
args.contains("--output-fmt cram") ? "cram" :
input.getExtension()
if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"

index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : ""
default_index_format =
file_type == "bam" ? "csi" :
file_type == "cram" ? "crai" : ""
index = args.contains("--write-index") && index_format ? "touch ${prefix}.${file_type}.${index_format}" : args.contains("--write-index") ? "touch ${prefix}.${file_type}.${default_index_format}" : ""
unselected = qname ? "touch ${prefix}.unselected.${file_type}" : ""
// Can't choose index type of unselected file
unselected_index = qname && args.contains("--write-index") ? "touch ${prefix}.unselected.${file_type}.${default_index_format}" : ""

if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
if (index_format) {
if (!index_format.matches('bai|csi|crai')) {
error "Index format not one of bai, csi, crai."
} else if (file_type == "sam") {
error "Indexing not compatible with SAM output."
}
}
"""
touch ${prefix}.${file_type}
${index}
${unselected}
${unselected_index}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
3 changes: 3 additions & 0 deletions modules/nf-core/samtools/view/tests/cram_index.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
process {
ext.args = "--output-fmt cram --write-index"
}
249 changes: 249 additions & 0 deletions modules/nf-core/samtools/view/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ nextflow_process {
])
input[1] = [[],[]]
input[2] = []
input[3] = []
"""
}
}
Expand All @@ -39,6 +40,135 @@ nextflow_process {
}
}

test("bam_csi_index") {

config "./bam_index.config"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
[]
])
input[1] = [[],[]]
input[2] = []
input[3] = 'csi'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.bam[0][1]).name,
file(process.out.csi[0][1]).name,
process.out.versions).match()
}
)
}
}

test("bam_bai_index") {

config "./bam_index.config"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
[]
])
input[1] = [[],[]]
input[2] = []
input[3] = 'bai'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.bam[0][1]).name,
file(process.out.bai[0][1]).name,
process.out.versions).match() }
)
}
}

test("bam_bai_index_unselected") {

config "./bam_index.config"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
[]
])
input[1] = [[],[]]
input[2] = Channel.of('testN:1')
.collectFile(name: 'selected_reads.txt')
input[3] = 'bai'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.bam[0][1]).name,
file(process.out.bai[0][1]).name,
file(process.out.unselected[0][1]).name,
file(process.out.unselected_index[0][1]).name,
process.out.versions).match()
}
)
}
}

test("cram_crai_index_unselected") {

config "./cram_index.config"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
[]
])
input[1] = [[],[]]
input[2] = Channel.of('testN:1')
.collectFile(name: 'selected_reads.txt')
input[3] = 'crai'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.cram[0][1]).name,
file(process.out.crai[0][1]).name,
file(process.out.unselected[0][1]).name,
file(process.out.unselected_index[0][1]).name,
process.out.versions).match()
}
)
}
}

test("cram") {

when {
Expand All @@ -54,6 +184,7 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
])
input[2] = []
input[3] = []
"""
}
}
Expand Down Expand Up @@ -89,6 +220,7 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
])
input[2] = []
input[3] = []
"""
}
}
Expand Down Expand Up @@ -124,6 +256,7 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
])
input[2] = []
input[3] = []
"""
}
}
Expand Down Expand Up @@ -159,6 +292,7 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
])
input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true)
input[3] = []
"""
}
}
Expand Down Expand Up @@ -194,6 +328,7 @@ nextflow_process {
])
input[1] = [[],[]]
input[2] = []
input[3] = []
"""
}
}
Expand All @@ -211,4 +346,118 @@ nextflow_process {
)
}
}

test("bam_csi_index - stub") {

options "-stub"
config "./bam_index.config"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
[]
])
input[1] = [[],[]]
input[2] = []
input[3] = 'csi'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("bam_bai_index - stub") {

options "-stub"
config "./bam_index.config"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
[]
])
input[1] = [[],[]]
input[2] = []
input[3] = 'bai'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("bam_bai_index_uselected - stub") {

options "-stub"
config "./bam_index.config"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
[]
])
input[1] = [[],[]]
input[2] = Channel.of('testN:1')
.collectFile(name: 'selected_reads.txt')
input[3] = 'bai'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("cram_crai_index_unselected - stub") {

options "-stub"
config "./cram_index.config"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
[]
])
input[1] = [[],[]]
input[2] = Channel.of('testN:1')
.collectFile(name: 'selected_reads.txt')
input[3] = 'crai'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
Loading

0 comments on commit bb3ec89

Please sign in to comment.