From a5265513001fdbcea14388670fdfabe5ae2367f6 Mon Sep 17 00:00:00 2001 From: FelixKrueger Date: Thu, 19 Mar 2020 18:13:15 +0000 Subject: [PATCH 1/4] Added Bowtie2 module and test workflow --- tools/bowtie2/main.nf | 55 ++++++++++++++++++++++++++++++ tools/bowtie2/meta.yml | 37 ++++++++++++++++++++ tools/bowtie2/test/main.nf | 31 +++++++++++++++++ tools/bowtie2/test/nextflow.config | 2 ++ 4 files changed, 125 insertions(+) create mode 100644 tools/bowtie2/main.nf create mode 100644 tools/bowtie2/meta.yml create mode 100755 tools/bowtie2/test/main.nf create mode 100644 tools/bowtie2/test/nextflow.config diff --git a/tools/bowtie2/main.nf b/tools/bowtie2/main.nf new file mode 100644 index 00000000000..e5ebf47fc54 --- /dev/null +++ b/tools/bowtie2/main.nf @@ -0,0 +1,55 @@ +nextflow.preview.dsl=2 +params.genome = '' + +process BOWTIE2 { + // depending on the genome used one might want/need to adjust the memory settings. + // For the E. coli test data this is probably not required + + // label 'bigMem' + // label 'multiCore' + + input: + tuple val(name), path(reads) + val (outdir) + val (bowtie2_args) + val (verbose) + + output: + path "*bam", emit: bam + path "*stats.txt", emit: stats + + publishDir "$outdir/bowtie2", + mode: "copy", overwrite: true + + script: + if (verbose){ + println ("[MODULE] BOWTIE2 ARGS: " + bowtie2_args) + } + + cores = 4 + + readString = "" + + // Options we add are + bowtie2_options = bowtie2_args + bowtie2_options += " --no-unal " // We don't need unaligned reads in the BAM file + + // single-end / paired-end distinction. Might also be handled via params.single_end + if (reads instanceof List) { + readString = "-1 " + reads[0] + " -2 " + reads[1] + } + else { + readString = "-U " + reads + } + + index = params.genome["bowtie2"] + bowtie2_name = name + "_" + params.genome["name"] + + println ("bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam") + """ + module load bowtie2 + module load samtools + bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam + """ + +} diff --git a/tools/bowtie2/meta.yml b/tools/bowtie2/meta.yml new file mode 100644 index 00000000000..b3b31a947ab --- /dev/null +++ b/tools/bowtie2/meta.yml @@ -0,0 +1,37 @@ +name: Bowtie 2 +description: Ultrafast alignment to reference genome +keywords: + - Alignment + - Short reads + - FM Index +tools: + - fastqc: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning sequencing reads + to long reference sequences. It is particularly good at aligning reads of about + 50 up to 100s or 1,000s of characters, and particularly good at aligning to relatively + long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an FM Index to keep + its memory footprint small: for the human genome, its memory footprint is typically + around 3.2 GB. Bowtie 2 supports gapped, local, and paired-end alignment modes. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml +input: + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: Input FastQ file, or pair of files +output: + - + - report: + type: file + description: mapping statistics report + pattern: *bowtie2_stats.txt + - alignment: + type: file + description: alignment file in BAM format + pattern: *bowtie2.bam +authors: + - @FelixKrueger diff --git a/tools/bowtie2/test/main.nf b/tools/bowtie2/test/main.nf new file mode 100755 index 00000000000..5c02914b839 --- /dev/null +++ b/tools/bowtie2/test/main.nf @@ -0,0 +1,31 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl=2 + +params.outdir = "." +params.genome = "" +params.bowtie2_args = '' +// Bowtie2 arguments should be supplied in the following format to work: +// --bowtie2_args="--score-min L,0,-0.8" + +params.verbose = false + +if (params.verbose){ + println ("[WORKFLOW] BOWTIE2 ARGS: " + params.bowtie2_args) +} + +// for other genomes this needs to be handled somehow to return all possible genomes +genomeValues = ["name" : params.genome] +genomeValues["bowtie2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/bowtie2/E_coli/${params.genome}"; + +include '../main.nf' params(genome: genomeValues) + +ch_read_files = Channel + .fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1) + // .view() // to check whether the input channel works + +workflow { + + main: + BOWTIE2(ch_read_files, params.outdir, params.bowtie2_args, params.verbose) + +} \ No newline at end of file diff --git a/tools/bowtie2/test/nextflow.config b/tools/bowtie2/test/nextflow.config new file mode 100644 index 00000000000..c137a138220 --- /dev/null +++ b/tools/bowtie2/test/nextflow.config @@ -0,0 +1,2 @@ +docker.enabled = true +params.outdir = './results' From a2104ca3b9307439d694bd339b8d12d645ba7b3a Mon Sep 17 00:00:00 2001 From: FelixKrueger Date: Fri, 20 Mar 2020 11:04:25 +0000 Subject: [PATCH 2/4] Added HISAT2 module and test workflow --- tools/hisat2/main.nf | 60 +++++++++++++++++++++++++++++++ tools/hisat2/meta.yml | 37 +++++++++++++++++++ tools/hisat2/test/main.nf | 34 ++++++++++++++++++ tools/hisat2/test/nextflow.config | 2 ++ 4 files changed, 133 insertions(+) create mode 100644 tools/hisat2/main.nf create mode 100644 tools/hisat2/meta.yml create mode 100755 tools/hisat2/test/main.nf create mode 100644 tools/hisat2/test/nextflow.config diff --git a/tools/hisat2/main.nf b/tools/hisat2/main.nf new file mode 100644 index 00000000000..e9d33993ada --- /dev/null +++ b/tools/hisat2/main.nf @@ -0,0 +1,60 @@ +nextflow.preview.dsl=2 +params.genome = '' + +process HISAT2 { + // depending on the genome used one might want/need to adjust the memory settings. + // For the E. coli test data this is probably not required + // label 'bigMem' + // label 'multiCore' + + input: + tuple val(name), path(reads) + val (outdir) + val (hisat2_args) + val (verbose) + + output: + path "*bam", emit: bam + path "*stats.txt", emit: stats + + publishDir "$outdir/hisat2", + mode: "copy", overwrite: true + + script: + + if (verbose){ + println ("[MODULE] HISAT2 ARGS: " + hisat2_args) + } + + cores = 4 + readString = "" + hisat_options = hisat2_args + + // Options we add are + hisat_options = hisat_options + " --no-unal --no-softclip " + + if (reads instanceof List) { + readString = "-1 "+reads[0]+" -2 "+reads[1] + hisat_options = hisat_options + " --no-mixed --no-discordant" + } + else { + readString = "-U "+reads + } + index = params.genome["hisat2"] + + splices = '' + if (params.genome.containsKey("hisat2_splices")){ + splices = " --known-splicesite-infile " + params.genome["hisat2_splices"] + } + else{ + println ("No key 'hisat2_splices' was supplied. Skipping...") + } + hisat_name = name + "_" + params.genome["name"] + + """ + module load hisat2 + module load samtools + hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString} 2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam + """ + +} \ No newline at end of file diff --git a/tools/hisat2/meta.yml b/tools/hisat2/meta.yml new file mode 100644 index 00000000000..ee79ba998fb --- /dev/null +++ b/tools/hisat2/meta.yml @@ -0,0 +1,37 @@ +name: HISAT2 +description: Graph-based alignment of next generation sequencing reads to a population of genomes +keywords: + - Alignment + - Short reads + - graph FM Index (GFM) + - RNA-seq +tools: + - fastqc: + description: | + HISAT2 is a fast and sensitive alignment program for mapping next-generation + sequencing reads (whole-genome, transcriptome, and exome sequencing data) + against the general human population (as well as against a single reference genome). + Based on GCSA (an extension of BWT for a graph) it is designed and implemented as a + graph FM index (GFM). + homepage: http://daehwankimlab.github.io/hisat2/ + documentation: https://ccb.jhu.edu/software/hisat2/manual.shtml +input: + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: Input FastQ file, or pair of files +output: + - + - report: + type: file + description: mapping statistics report + pattern: *hisat2_stats.txt + - alignment: + type: file + description: alignment file in BAM format + pattern: *hisat2.bam +authors: + - @FelixKrueger diff --git a/tools/hisat2/test/main.nf b/tools/hisat2/test/main.nf new file mode 100755 index 00000000000..f846c94c508 --- /dev/null +++ b/tools/hisat2/test/main.nf @@ -0,0 +1,34 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl=2 + +params.outdir = "." +params.genome = "" +params.hisat2_args = '' +// HISAT2 arguments should be supplied in the following format to work: +// --hisat2_args="--score-min L,0,-0.8" + +params.verbose = false + +if (params.verbose){ + println ("[WORKFLOW] HISAT2 ARGS ARE: " + params.hisat2_args) +} +// for other genomes this needs to be handled somehow to return all possible genomes +genomeValues = ["name" : params.genome] +genomeValues["hisat2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/hisat2/E_coli/${params.genome}"; + +include '../main.nf' params(genome: genomeValues) + +ch_read_files = Channel + .fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1) + // .view() // to check whether the input channel works + +workflow { + + main: + HISAT2(ch_read_files, params.outdir, params.hisat2_args, params.verbose) +} + + + + + diff --git a/tools/hisat2/test/nextflow.config b/tools/hisat2/test/nextflow.config new file mode 100644 index 00000000000..63c458cace0 --- /dev/null +++ b/tools/hisat2/test/nextflow.config @@ -0,0 +1,2 @@ +// docker.enabled = true +params.outdir = './results' From 23c2fd8520793b2961562992b044661fb135806d Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sat, 11 Jul 2020 13:27:56 +0200 Subject: [PATCH 3/4] Apply suggestions from code review --- tools/bowtie2/main.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/bowtie2/main.nf b/tools/bowtie2/main.nf index e5ebf47fc54..6d1dc55ee42 100644 --- a/tools/bowtie2/main.nf +++ b/tools/bowtie2/main.nf @@ -45,10 +45,7 @@ process BOWTIE2 { index = params.genome["bowtie2"] bowtie2_name = name + "_" + params.genome["name"] - println ("bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam") """ - module load bowtie2 - module load samtools bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam """ From 7ef2a978829274606974750e0d53578ceacd83e4 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sat, 11 Jul 2020 13:28:41 +0200 Subject: [PATCH 4/4] Update tools/hisat2/main.nf --- tools/hisat2/main.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/hisat2/main.nf b/tools/hisat2/main.nf index e9d33993ada..55e6fe78e9e 100644 --- a/tools/hisat2/main.nf +++ b/tools/hisat2/main.nf @@ -52,9 +52,7 @@ process HISAT2 { hisat_name = name + "_" + params.genome["name"] """ - module load hisat2 - module load samtools hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString} 2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam """ -} \ No newline at end of file +}