Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bowtie2: remove code duplication #1645

Merged
merged 13 commits into from
May 11, 2022
109 changes: 50 additions & 59 deletions modules/bowtie2/align/main.nf
Original file line number Diff line number Diff line change
@@ -1,81 +1,72 @@
process BOWTIE2_ALIGN {
tag "$meta.id"
label 'process_high'
label "process_high"

conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0' :
'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0' }"
conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null)
container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ?
"https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" :
"quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }"

input:
tuple val(meta), path(reads)
path index
val save_unaligned

output:
tuple val(meta), path('*.bam') , emit: bam
tuple val(meta), path('*.log') , emit: log
tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*.log") , emit: log
tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def args = task.ext.args ?: ""
def args2 = task.ext.args2 ?: ""
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
"""
INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
[ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed 's/.rev.1.bt2l//'`
[ -z "\$INDEX" ] && echo "BT2 index files not found" 1>&2 && exit 1
bowtie2 \\
-x \$INDEX \\
-U $reads \\
--threads $task.cpus \\
$unaligned \\
$args \\
2> ${prefix}.bowtie2.log \\
| samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam -

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
def unaligned = ""
def reads_args = ""
if (meta.single_end) {
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ""
reads_args = "-U ${reads}"
} else {
def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
"""
INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
[ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed 's/.rev.1.bt2l//'`
[ -z "\$INDEX" ] && echo "BT2 index files not found" 1>&2 && exit 1
bowtie2 \\
-x \$INDEX \\
-1 ${reads[0]} \\
-2 ${reads[1]} \\
--threads $task.cpus \\
$unaligned \\
$args \\
2> ${prefix}.bowtie2.log \\
| samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam -
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ""
reads_args = "-1 ${reads[0]} -2 ${reads[1]}"
}

if [ -f ${prefix}.unmapped.fastq.1.gz ]; then
mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz
fi
if [ -f ${prefix}.unmapped.fastq.2.gz ]; then
mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz
fi
def samtools_command = "samtools view -@ $task.cpus --bam --with-header ${args2} > ${prefix}.bam"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}

"""
INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"`
[ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"`
[ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1

bowtie2 \\
-x \$INDEX \\
$reads_args \\
--threads $task.cpus \\
$unaligned \\
$args \\
2> ${prefix}.bowtie2.log \\
| $samtools_command

if [ -f ${prefix}.unmapped.fastq.1.gz ]; then
mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz
fi

if [ -f ${prefix}.unmapped.fastq.2.gz ]; then
mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz
fi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}

84 changes: 11 additions & 73 deletions tests/modules/bowtie2/align/test.yml
Original file line number Diff line number Diff line change
@@ -1,83 +1,21 @@
- name: bowtie2 align single-end
- name: bowtie2 align test_bowtie2_align_single_end
command: nextflow run ./tests/modules/bowtie2/align -entry test_bowtie2_align_single_end -c ./tests/config/nextflow.config -c ./tests/modules/bowtie2/align/nextflow.config
tags:
- bowtie2
- bowtie2/align
files:
- path: ./output/bowtie2/test.bam
- path: ./output/bowtie2/test.bowtie2.log
- path: ./output/bowtie2/bowtie2/genome.3.bt2
md5sum: 4ed93abba181d8dfab2e303e33114777
- path: ./output/bowtie2/bowtie2/genome.2.bt2
md5sum: 47b153cd1319abc88dda532462651fcf
- path: ./output/bowtie2/bowtie2/genome.1.bt2
md5sum: cbe3d0bbea55bc57c99b4bfa25b5fbdf
- path: ./output/bowtie2/bowtie2/genome.4.bt2
md5sum: c25be5f8b0378abf7a58c8a880b87626
- path: ./output/bowtie2/bowtie2/genome.rev.1.bt2
md5sum: 52be6950579598a990570fbcf5372184
- path: ./output/bowtie2/bowtie2/genome.rev.2.bt2
md5sum: e3b4ef343dea4dd571642010a7d09597

- name: bowtie2 align paired-end
command: nextflow run ./tests/modules/bowtie2/align -entry test_bowtie2_align_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bowtie2/align/nextflow.config
tags:
- bowtie2
- bowtie2/align
files:
- path: ./output/bowtie2/test.bam
- path: ./output/bowtie2/test.bowtie2.log
- path: ./output/bowtie2/bowtie2/genome.3.bt2
md5sum: 4ed93abba181d8dfab2e303e33114777
- path: ./output/bowtie2/bowtie2/genome.2.bt2
md5sum: 47b153cd1319abc88dda532462651fcf
- path: ./output/bowtie2/bowtie2/genome.1.bt2
md5sum: cbe3d0bbea55bc57c99b4bfa25b5fbdf
- path: ./output/bowtie2/bowtie2/genome.4.bt2
md5sum: c25be5f8b0378abf7a58c8a880b87626
- path: ./output/bowtie2/bowtie2/genome.rev.1.bt2
md5sum: 52be6950579598a990570fbcf5372184
- path: ./output/bowtie2/bowtie2/genome.rev.2.bt2
md5sum: e3b4ef343dea4dd571642010a7d09597
Comment on lines -30 to -41
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These have been all deleted because the BAM won't be present without the index files? Is there any harm in leaving them in as a sanity check?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file was auto-generated by create-test-yaml, so I've no vested interest in the content

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also have no idea why the index files have ever been included in the output from bowtie2 align

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, no idea what happened here 😏 Does /output/bowtie2/bowtie2/ exist when you run pytest?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.
├── 40
│   └── fde8dfbf23b62799ec1c8fe1edc520
│       ├── bowtie2 -> /tmp/pytest_workflow_um_6gy_g/bowtie2_align_test_bowtie2_align_single_end/work/43/d87f151078c05ee619a09f9adaae31/bowtie2
│       ├── test.bam
│       ├── test.bowtie2.log
│       ├── test_1.fastq.gz -> /tmp/pytest_workflow_um_6gy_g/bowtie2_align_test_bowtie2_align_single_end/work/stage/07/142d3883569478136b1e5e6536ffbd/test_1.fastq.gz
│       └── versions.yml
├── 43
│   └── d87f151078c05ee619a09f9adaae31
│       ├── bowtie2
│       │   ├── genome.1.bt2
│       │   ├── genome.2.bt2
│       │   ├── genome.3.bt2
│       │   ├── genome.4.bt2
│       │   ├── genome.rev.1.bt2
│       │   └── genome.rev.2.bt2
│       ├── genome.fasta -> /tmp/pytest_workflow_um_6gy_g/bowtie2_align_test_bowtie2_align_single_end/work/stage/7a/1e46d5cd1dad747bdec96651090752/genome.fasta
│       └── versions.yml
└── stage
    ├── 07
    │   └── 142d3883569478136b1e5e6536ffbd
    │       └── test_1.fastq.gz
    └── 7a
        └── 1e46d5cd1dad747bdec96651090752
            └── genome.fasta

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, in the output directory not the work directory.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

output/
└── bowtie2
    ├── bowtie2 -> /tmp/pytest_workflow_82vq1t4y/bowtie2_align_test_bowtie2_align_single_end/work/8b/b0d4b9040c8329f51428d5e92b6812/bowtie2
    │   ├── genome.1.bt2
    │   ├── genome.2.bt2
    │   ├── genome.3.bt2
    │   ├── genome.4.bt2
    │   ├── genome.rev.1.bt2
    │   └── genome.rev.2.bt2
    ├── test.bam -> /tmp/pytest_workflow_82vq1t4y/bowtie2_align_test_bowtie2_align_single_end/work/e7/61e8210898ae529cbed8912da0512a/test.bam
    ├── test.bowtie2.log -> /tmp/pytest_workflow_82vq1t4y/bowtie2_align_test_bowtie2_align_single_end/work/e7/61e8210898ae529cbed8912da0512a/test.bowtie2.log
    └── versions.yml -> /tmp/pytest_workflow_82vq1t4y/bowtie2_align_test_bowtie2_align_single_end/work/e7/61e8210898ae529cbed8912da0512a/versions.yml

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. So the indices are published so why are they being removed from the test.yml?🤔 This scares me a little just in case there are instances where create-test-yml is missing files it shouldn't. Worth checking again before the next release.

Would you mind creating another issue please with some commands to reproduce?

Thanks!

- path: output/bowtie2/test.bam
- path: output/bowtie2/test.bowtie2.log
md5sum: 7b8a9e61b7646da1089b041333c41a87
- path: output/bowtie2/versions.yml

- name: bowtie2 align single-end large-index
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another one for you @chris-cheshire. You added these entries to test.yml for large indices but we don't have the corresponding entries in main.nf to actually test them? This is why all of these lines have automatically been deleted by create-test-yml.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh do we need to though? they use the same entry point, they just change a parameter

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, the workflow in main.nf needs to be explicitly duplicated for each test case. Maybe you can add these in a follow up PR after this is merged? Can ping me to review.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah my bad will do

command: nextflow run ./tests/modules/bowtie2/align -entry test_bowtie2_align_single_end -c ./tests/config/nextflow.config -c ./tests/modules/bowtie2/align/nextflow.config --force_large_index
- name: bowtie2 align test_bowtie2_align_paired_end
command: nextflow run ./tests/modules/bowtie2/align -entry test_bowtie2_align_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bowtie2/align/nextflow.config
tags:
- bowtie2
- bowtie2/align
files:
- path: ./output/bowtie2/test.bam
- path: ./output/bowtie2/test.bowtie2.log
- path: ./output/bowtie2/bowtie2/genome.3.bt2l
md5sum: 8952b3e0b1ce9a7a5916f2e147180853
- path: ./output/bowtie2/bowtie2/genome.2.bt2l
md5sum: 22c284084784a0720989595e0c9461fd
- path: ./output/bowtie2/bowtie2/genome.1.bt2l
md5sum: 07d811cd4e350d56267183d2ac7023a5
- path: ./output/bowtie2/bowtie2/genome.4.bt2l
md5sum: c25be5f8b0378abf7a58c8a880b87626
- path: ./output/bowtie2/bowtie2/genome.rev.1.bt2l
md5sum: fda48e35925fb24d1c0785f021981e25
- path: ./output/bowtie2/bowtie2/genome.rev.2.bt2l
md5sum: 802c26d32b970e1b105032b7ce7348b4

- name: bowtie2 align paired-end large-index
command: nextflow run ./tests/modules/bowtie2/align -entry test_bowtie2_align_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bowtie2/align/nextflow.config --force_large_index
tags:
- bowtie2
- bowtie2/align
files:
- path: ./output/bowtie2/test.bam
- path: ./output/bowtie2/test.bowtie2.log
- path: ./output/bowtie2/bowtie2/genome.3.bt2l
md5sum: 8952b3e0b1ce9a7a5916f2e147180853
- path: ./output/bowtie2/bowtie2/genome.2.bt2l
md5sum: 22c284084784a0720989595e0c9461fd
- path: ./output/bowtie2/bowtie2/genome.1.bt2l
md5sum: 07d811cd4e350d56267183d2ac7023a5
- path: ./output/bowtie2/bowtie2/genome.4.bt2l
md5sum: c25be5f8b0378abf7a58c8a880b87626
- path: ./output/bowtie2/bowtie2/genome.rev.1.bt2l
md5sum: fda48e35925fb24d1c0785f021981e25
- path: ./output/bowtie2/bowtie2/genome.rev.2.bt2l
md5sum: 802c26d32b970e1b105032b7ce7348b4
- path: output/bowtie2/test.bam
- path: output/bowtie2/test.bowtie2.log
md5sum: bd89ce1b28c93bf822bae391ffcedd19
- path: output/bowtie2/versions.yml