some more fixes to get the pipeline running again

nf-core · Feb 22, 2024 · 87797f3 · 87797f3
1 parent 8e8f9fe
commit 87797f3
Show file tree

Hide file tree

Showing 6 changed files with 63 additions and 45 deletions.
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -9,18 +9,21 @@
         "properties": {
             "test_vcf": {
                 "type": "string",
-                "pattern": "",
+                "pattern": "\\S+\\.vcf(\\.gz)?$",
+                "format": "file-path",
                 "errorMessage": "Test VCF must be provided, cannot contain spaces and must have extension '.vcf.gz'"
             },
             "caller": {
                 "type": "string",
                 "pattern": "^\\S+$",
-                "errorMessage": "Name of the variant caller used to generate test file"
+                "errorMessage": "Name of the variant caller used to generate test file",
+                "meta": ["caller"]
             },
             "vartype": {
                 "type": "string",
                 "pattern": "^\\S+$",
-                "errorMessage": "Variant type to benchmark"
+                "errorMessage": "Variant type to benchmark",
+                "meta": ["vartype"]
             }
         },
         "required": ["test_vcf", "caller", "vartype"]

diff --git a/conf/test.config b/conf/test.config
@@ -22,9 +22,12 @@ params {
     // Input data
     sample               = "HG002"
     input                = 'assets/samplesheet.csv'
+    fasta                = 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta'
+    fai                  = 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai'
 
     // Genome references
     genome               = 'hg38'
+    igenomes_ignore      = false
 
     // Processes
     analysis             = 'germline'

diff --git a/main.nf b/main.nf
@@ -11,6 +11,16 @@
 
 nextflow.enable.dsl = 2
 
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    GENOME PARAMETER VALUES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+include { getGenomeAttribute      } from './subworkflows/local/utils_nfcore_variantbenchmarking_pipeline'
+
+params.fasta = getGenomeAttribute('fasta')
+params.fai   = getGenomeAttribute('fai')
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
@@ -21,19 +31,30 @@ include { VARIANTBENCHMARKING     } from './workflows/variantbenchmarking'
 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_variantbenchmarking_pipeline'
 include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_variantbenchmarking_pipeline'
 
-include { getGenomeAttribute      } from './subworkflows/local/utils_nfcore_variantbenchmarking_pipeline'
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    GENOME PARAMETER VALUES
+    NAMED WORKFLOW FOR PIPELINE
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-// TODO nf-core: Remove this line if you don't need a FASTA file
-//   This is an example of how to use getGenomeAttribute() to fetch parameters
-//   from igenomes.config using `--genome`
-params.fasta = getGenomeAttribute('fasta')
-params.fai   = getGenomeAttribute('fai')
+// WORKFLOW: Run main nf-core/sarek analysis pipeline
+workflow NFCORE_VARIANTBENCHMARKING {
+
+    take:
+    ch_samplesheet
+
+    main:
+
+    //
+    // WORKFLOW: Run pipeline
+    //
+    VARIANTBENCHMARKING(
+        ch_samplesheet
+    )
+
+    emit:
+    multiqc_report = VARIANTBENCHMARKING.out.multiqc_report // channel: /path/to/multiqc_report.html
+}
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -96,6 +96,7 @@
             "type": "object",
             "fa_icon": "fas fa-dna",
             "description": "Reference genome related files and options required for the workflow.",
+            "required": ["fasta", "fai"],
             "properties": {
                 "genome": {
                     "type": "string",

diff --git a/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf b/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf
@@ -82,22 +82,6 @@ workflow PIPELINE_INITIALISATION {
     //
     Channel
         .fromSamplesheet("input")
-        .map {
-            meta, fastq_1, fastq_2 ->
-                if (!fastq_2) {
-                    return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
-                } else {
-                    return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
-                }
-        }
-        .groupTuple()
-        .map {
-            validateInputSamplesheet(it)
-        }
-        .map {
-            meta, fastqs ->
-                return [ meta, fastqs.flatten() ]
-        }
         .set { ch_samplesheet }
 
     emit:

diff --git a/workflows/variantbenchmarking.nf b/workflows/variantbenchmarking.nf
@@ -1,20 +1,3 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation'
-
-def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
-def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
-def summary_params = paramsSummaryMap(workflow)
-
-// Print parameter summary log to screen
-log.info logo + paramsSummaryLog(workflow) + citation
-
-WorkflowVariantbenchmarking.initialise(params, log)
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     CONFIG FILES
@@ -40,6 +23,13 @@ include { paramsSummaryMap       } from 'plugin/nf-validation'
 include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_variantbenchmarking_pipeline'
+include { SOMATIC_BENCHMARK        } from '../subworkflows/local/somatic_benchmark'
+include { SV_GERMLINE_BENCHMARK    } from '../subworkflows/local/sv_germline_benchmark'
+include { PREPARE_VCFS_TRUTH       } from '../subworkflows/local/prepare_vcfs_truth'
+include { PREPARE_VCFS_TEST        } from '../subworkflows/local/prepare_vcfs_test'
+include { SV_VCF_CONVERSIONS       } from '../subworkflows/local/sv_vcf_conversion'
+include { REPORT_VCF_STATISTICS as REPORT_STATISTICS_TEST } from '../subworkflows/local/report_vcf_statistics'
+include { REPORT_VCF_STATISTICS as REPORT_STATISTICS_TRUTH } from '../subworkflows/local/report_vcf_statistics'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -54,7 +44,23 @@ workflow VARIANTBENCHMARKING {
 
     main:
 
-    ch_versions = Channel.empty()
+    ch_versions      = Channel.empty()
+    ch_multiqc_files = Channel.empty()
+
+    // check mandatory parameters
+    println(params.fasta)
+    println(params.fai)
+    ref         = Channel.fromPath([params.fasta,params.fai], checkIfExists: true).collect()
+
+    // check high confidence files
+
+    truth       = params.truth              ? Channel.fromPath(params.truth, checkIfExists: true).collect()
+                                            : Channel.empty()
+
+    high_conf   = params.high_conf          ? Channel.fromPath(params.high_conf, checkIfExists: true).collect()
+                                            : Channel.empty()
+
+    // TODO: GET FILES FROM IGENOMES ACCORDING TO META.ID
 
     ch_samplesheet.branch{
             sv:  it[0].vartype == "sv"