diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 97e344d..c40d642 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,7 @@ Changelog v2.3.1-dev ********** * **Breaking change**: Update the json output format +* **Breaking change**: Update snakemake to version 8.23 * Fix a bug with the Java runtime environment for Picard ********** diff --git a/Snakefile b/Snakefile index 1f212f2..3705d9c 100644 --- a/Snakefile +++ b/Snakefile @@ -141,7 +141,7 @@ use rule normalized_coverage from expression as expression_normalized_coverage w counts=align.module_output.counts, gtf=config["expression"]["gtf"], bed=config["expression"].get("bed", []), - src=srcdir("includes/expression/scripts/coverage.py"), + src=workflow.source_path("includes/expression/scripts/coverage.py"), rule create_summary: @@ -152,7 +152,7 @@ rule create_summary: snv_indels_json=align.module_output.json, itd_json=itd.module_output.json, expression_json=expression.module_output.json, - scr=srcdir("scripts/create_summary.py"), + scr=workflow.source_path("scripts/create_summary.py"), params: pipeline_ver=PIPELINE_VERSION, output: @@ -178,11 +178,12 @@ rule generate_report: """Generates a PDF report of the essential results.""" input: summary=rules.create_summary.output.js, - css=srcdir("report/assets/style.css"), - templates=srcdir("report/templates"), - imgs=srcdir("report/assets/img"), - toc=srcdir("report/assets/toc.xsl"), - scr=srcdir("scripts/generate_report.py"), + css=workflow.source_path("report/assets/style.css"), + toc=workflow.source_path("report/assets/toc.xsl"), + scr=workflow.source_path("scripts/generate_report.py"), + params: + templates="report/templates", + imgs="report/assets/img", output: "{sample}/hamlet_report.{sample}.pdf", log: @@ -192,8 +193,8 @@ rule generate_report: shell: """ python3 {input.scr} \ - --templates-dir {input.templates} \ - --imgs-dir {input.imgs} \ + --templates-dir {params.templates} \ + --imgs-dir {params.imgs} \ --css-path {input.css} \ --toc-path {input.toc} \ {input.summary} \ @@ -205,11 +206,12 @@ rule generate_html_report: """Generates a HTML report of the essential results, used for testing only""" input: summary=rules.create_summary.output.js, - css=srcdir("report/assets/style.css"), - templates=srcdir("report/templates"), - imgs=srcdir("report/assets/img"), - toc=srcdir("report/assets/toc.xsl"), - scr=srcdir("scripts/generate_report.py"), + css=workflow.source_path("report/assets/style.css"), + toc=workflow.source_path("report/assets/toc.xsl"), + scr=workflow.source_path("scripts/generate_report.py"), + params: + templates="report/templates", + imgs="report/assets/img", output: "{sample}/hamlet_report.{sample}.html", log: @@ -219,8 +221,8 @@ rule generate_html_report: shell: """ python3 {input.scr} \ - --templates-dir {input.templates} \ - --imgs-dir {input.imgs} \ + --templates-dir {params.templates} \ + --imgs-dir {params.imgs} \ --css-path {input.css} \ --toc-path {input.toc} \ {input.summary} \ @@ -233,7 +235,7 @@ rule multiqc: qc_stats=qc_seq.module_output.multiqc_files, snv_indel_stats=align.module_output.multiqc_files, expression_stats=expression.module_output.multiqc_files, - config=srcdir("cfg/multiqc.yml"), + config=workflow.source_path("cfg/multiqc.yml"), params: filelist="multiqc_filelist.txt", depth=2, diff --git a/docs/source/CHANGELOG.rst b/docs/source/CHANGELOG.rst deleted file mode 120000 index bfa394d..0000000 --- a/docs/source/CHANGELOG.rst +++ /dev/null @@ -1 +0,0 @@ -../../CHANGELOG.rst \ No newline at end of file diff --git a/docs/source/CHANGELOG.rst b/docs/source/CHANGELOG.rst new file mode 100644 index 0000000..c40d642 --- /dev/null +++ b/docs/source/CHANGELOG.rst @@ -0,0 +1,183 @@ +######### +Changelog +######### + +.. Newest changes should be on top. + +.. This document is user facing. Please word the changes in such a way +.. that users understand how the changes affect the new version. + +********** +v2.3.1-dev +********** +* **Breaking change**: Update the json output format +* **Breaking change**: Update snakemake to version 8.23 +* Fix a bug with the Java runtime environment for Picard + +********** +v2.2.1 +********** + +Breaking changes +================ +* The `bed_variant_call_regions` option has been removed, variants are now + called for all genes present in the `gtf` file. +* Add graphviz/`dot` as a dependency (developer only). +* Please create a new HAMLET configuration file with `create-config.py` script. +* To use the latest hotspot regions and artifact blacklist, please recreate the + HAMLET reference data. + +Novel module +============ +* Add novel module, **expression**, which analyzes gene expression. + * Add optional input `strandedness` to the sample configuration. + * Add json output file for the expression module. + +Bugfixes +======== +* Fix a rare bug where different modules use the same MultiQC file list. +* Fix a bug with filtering VEP records that contain multiple population. + frequency records for a single variant. + +Updates +======= +* Add ability to generate configurations for each module using the + `utilities/create-config.py` script. +* Update the hotspot regions reference file. +* Update the blacklist of known artifacts. +* Remove various superfluous plots from the MultiQC report. + +********** +v2.1.3 +********** +* Add `pysam` to the conda environment (developer only) +* Add exon number to variant table +* Add chromosomes to fusion table + +********** +v2.1.2 +********** +* Set the maximum population frequency to 1%. This was accidentally set to 5% in v2.1.1 + +********** +v2.1.1 +********** + +Bugfixes +======== +* Fix a bug where VEP removed rare variants + +********** +v2.1.0 +********** + +Breaking changes +================ +* Remove the JSON output for the qc-seq module (this has been replaced by a + MultiQC report) +* Add sample name to STAR counts table + +Changes +======= +* Automatically remove _STAR temporary folders +* Modified PDF formatting + * Change cover image + * Add bookmarks under chapter variant + * Sort the genes of interest alphabetically + * Remove the "Sequencing Results" section from the report (this has been replaced by a MultiQC report) +* Replace FastQC with Sequali + +Bugfixes +======== +* Fix a bug where the trimmed FastQ files are not removed when no longer needed + +Updates +======= +* Update Cutadapt to 4.6 +* Update MultiQC to 1.22 +* Update snakefmt to 0.10.0 (developer only) +* Update black to 24.3.0 (developer only) + + +********** +v2.0.5 +********** +* Increase space for the HGVS description in "Results Overview" table + +********** +v2.0.4 +********** +* Automatically check the release tag is set correctly + +********** +v2.0.3 +********** + +Bugfixes +======== +* Fix a bug where long HGVS descriptions make the "Results Overview" table overflow the page + +********** +v2.0.2 +********** +* Include the sample name in the final BAM file + +********** +v2.0.1 +********** + +Bugfixes +======== +* Update version number in HAMLET report + +********** +v2.0.0 +********** + +Bugfixes +======== +* Fix a bug with inconsistent config setting 'blacklist' in snv*indels +* Fix a bug where unmapped reads are not included in STAR output file + +Tool changes +============ +* Replace StarFusion and FusionCatcher with Arriba +* Replace VarScan variant caller with VarDict +* Replace GSNAP aligner with STAR + +Tool updates +============ +* Update VEP to 108.2 +* Update Picard to 2.27.4 +* Update FastQC to 0.11.9 +* Update Cutadapt to 4.1 + +Speed improvements +================== +* Use multiple threads for Cutadapt, and reduce the compression of output files + +Changes +======= +* Remove run name from the report +* Deprecate option `fusion*partners`, in favour of `report_genes`, which points + to a list of fusion genes to report +* Show allele frequency as a percentage in the pfd report +* Add additional genes of interest + - SRSF2 + - SF3B1 + - U2AF1 + - BCOR + - STAG2 + - ZRSR2 + - EZH2 +* Filter fusion results based on fusion partners +* Add fusion plots from Arriba +* Add default blacklist with common false*positive variants +* Add support for variant blacklist in VEP hgvsc format +* Add script to generate a configuration file +* Add pipeline to generate reference files +* Add per*module configuration options +* Add support for PEP sample configuration +* Add support for Snakemake 7.8.5 +* Remove variants plots +* Use MANE select transcript for all genes diff --git a/environment.yml b/environment.yml index 6b90456..28a9a03 100644 --- a/environment.yml +++ b/environment.yml @@ -6,13 +6,13 @@ channels: - bioconda - conda-forge dependencies: - - bs4=4.11 - - peppy>=0.35 - - pytest-workflow>=1.6.0 - - snakemake-minimal=7.32 - - snakefmt=0.10.0 + - bs4=4.12 + - peppy=0.40.7 + - pytest-workflow=2.1.0 + - snakemake-minimal=8.24.1 + - snakefmt=0.10.2 + - sphinx=8.1.3 + - sphinx-rtd-theme=3.0.0 - black=24.3.0 - - sphinx=7.1.2 - - sphinx-rtd-theme - pysam=0.22 - graphviz diff --git a/includes/expression/Snakefile b/includes/expression/Snakefile index 8b595d0..5e08523 100644 --- a/includes/expression/Snakefile +++ b/includes/expression/Snakefile @@ -14,7 +14,9 @@ rule normalized_coverage: counts=get_counts, gtf=config["gtf"], bed=config.get("bed", []), - src=srcdir("scripts/coverage.py"), + src=workflow.source_path("scripts/coverage.py"), + # Needed to localize the gtf script to the cache + utils=workflow.source_path("scripts/gtf.py"), params: housekeeping=config["housekeeping"], genes_of_interest=config["genes_of_interest"], @@ -55,7 +57,7 @@ rule transform_counts: """Transform the counts table to use with seAMLess""" input: counts=get_counts, - src=srcdir("scripts/transform_counts.py"), + src=workflow.source_path("scripts/transform_counts.py"), output: tsv="{sample}/expression/seAMLess.tsv", params: @@ -79,7 +81,7 @@ rule json_output: input: coverage=module_output.coverage, norm_coverage=module_output.normalized_expression, - src=srcdir("scripts/json-output.py"), + src=workflow.source_path("scripts/json-output.py"), params: strandedness=get_strand, genes=config["report"], @@ -104,7 +106,7 @@ rule json_output: rule merge_samples: input: counts=[module_output.normalized_expression(sample) for sample in samples], - src=srcdir("scripts/multiqc.py"), + src=workflow.source_path("scripts/multiqc.py"), params: samples=[sample.sample for sample in samples], strandedness=[get_strand(sample) for sample in samples], @@ -129,7 +131,7 @@ rule merge_samples: rule multiqc: input: stats=module_output.multiqc_files, - config=srcdir("../../cfg/multiqc.yml"), + config=workflow.source_path("../../cfg/multiqc.yml"), params: filelist="multiqc_filelist_expression.txt", depth=2, diff --git a/includes/fusion/Snakefile b/includes/fusion/Snakefile index d5d479c..a2a9016 100644 --- a/includes/fusion/Snakefile +++ b/includes/fusion/Snakefile @@ -41,8 +41,8 @@ rule arriba: rule filter_fusions: input: fusions="{sample}/fusion/arriba/fusions.raw.tsv", - to_json=srcdir("scripts/arriba2json.py"), - to_tsv=srcdir("scripts/json2arriba.py"), + to_json=workflow.source_path("scripts/arriba2json.py"), + to_tsv=workflow.source_path("scripts/json2arriba.py"), report_genes=config["report_genes"], output: json="{sample}/fusion/arriba/fusions.json", @@ -109,7 +109,7 @@ rule json_output: input: json="{sample}/fusion/arriba/fusions.json", plots="{sample}/fusion/arriba/plots", - src=srcdir("scripts/json-output.py"), + src=workflow.source_path("scripts/json-output.py"), output: json="{sample}/fusion/fusion-output.json", log: diff --git a/includes/itd/Snakefile b/includes/itd/Snakefile index 66eb1fe..43ca292 100644 --- a/includes/itd/Snakefile +++ b/includes/itd/Snakefile @@ -153,7 +153,7 @@ rule json_output: flt3_plot="{sample}/itd/{sample}.flt3.png", kmt2a_csv="{sample}/itd/{sample}.kmt2a.csv", kmt2a_plot="{sample}/itd/{sample}.kmt2a.png", - src=srcdir("scripts/json-output.py"), + src=workflow.source_path("scripts/json-output.py"), output: "{sample}/itd/itd-output.json", log: diff --git a/includes/qc-seq/Snakefile b/includes/qc-seq/Snakefile index 7a1762a..9f3a87e 100644 --- a/includes/qc-seq/Snakefile +++ b/includes/qc-seq/Snakefile @@ -71,7 +71,7 @@ rule sequali: rule multiqc: input: stats=module_output.multiqc_files, - config=srcdir("../../cfg/multiqc.yml"), + config=workflow.source_path("../../cfg/multiqc.yml"), params: filelist="multiqc_filelist_qc_seq.txt", depth=2, diff --git a/includes/snv-indels/Snakefile b/includes/snv-indels/Snakefile index 4c7d742..9273595 100644 --- a/includes/snv-indels/Snakefile +++ b/includes/snv-indels/Snakefile @@ -34,7 +34,7 @@ rule call_regions: input: fasta=config["genome_fasta"], gtf=config["gtf"], - src=srcdir("scripts/create_bed.sh"), + src=workflow.source_path("scripts/create_bed.sh"), output: # Intermediate files from the bash script chroms=temporary("chroms.txt"), @@ -184,7 +184,7 @@ rule exon_cov: bed=".tmp.exon_cov_ref.bed", genome=".tmp.genome.txt", idm=config["ref_id_mapping"], - scr=srcdir("scripts/aggr_exon_cov.py"), + scr=workflow.source_path("scripts/aggr_exon_cov.py"), output: json="{sample}/snv-indels/{sample}.exon_cov_stats.json", log: @@ -405,7 +405,7 @@ rule filter_vep: vep="{sample}/snv-indels/{sample}.vep.txt.gz", ref_id_mapping=config["ref_id_mapping"], hotspots="{sample}/snv-indels/{sample}.hotspot.vcf", - scr=srcdir("scripts/filter_vep.py"), + scr=workflow.source_path("scripts/filter_vep.py"), blacklist=config.get("blacklist", []), params: vep_consequences=config["vep_include_consequence"], @@ -442,7 +442,7 @@ rule json_output: insert_stats="{sample}/snv-indels/{sample}.insert_stats", exon_cov_stats="{sample}/snv-indels/{sample}.exon_cov_stats.json", vep_stats="{sample}/snv-indels/{sample}.vep_stats.txt", - src=srcdir("scripts/json-output.py"), + src=workflow.source_path("scripts/json-output.py"), output: "{sample}/snv-indels/snv-indels-output.json", log: @@ -466,7 +466,7 @@ rule json_output: rule multiqc: input: stats=module_output.multiqc_files, - config=srcdir("../../cfg/multiqc.yml"), + config=workflow.source_path("../../cfg/multiqc.yml"), params: filelist="multiqc_filelist_snv_indels.txt", depth=2, diff --git a/test/config.yaml b/test/config.yaml index 8864f9a..7b9066e 100644 --- a/test/config.yaml +++ b/test/config.yaml @@ -1,9 +1,8 @@ use-singularity: True -singularity-args: '--cleanenv --bind /tmp' +singularity-args: '--cleanenv --bind /tmp,/home' singularity-prefix: '~/.singularity/cache/snakemake' printshellcmds: True verbose: True -reason: True cores: 1 keep-incomplete: True keep-going: True diff --git a/test/test_hamlet.yml b/test/test_hamlet.yml index 25af1fc..28e16bd 100644 --- a/test/test_hamlet.yml +++ b/test/test_hamlet.yml @@ -205,7 +205,6 @@ command: > bash -c " snakemake \ - --dry-run \ --snakefile Snakefile \ --workflow-profile test \ --configfile test/data/config/hamlet-dry.json \ diff --git a/test/test_utilities.yml b/test/test_utilities.yml index 5626a00..68f0743 100644 --- a/test/test_utilities.yml +++ b/test/test_utilities.yml @@ -39,9 +39,7 @@ command: > snakemake -n --snakefile utilities/deps/Snakefile - --reason - --printshellcmds - --verbose + --workflow-profile test stdout: contains: - rule arriba_database diff --git a/utilities/deps/Snakefile b/utilities/deps/Snakefile index 31a7b00..bff75f5 100644 --- a/utilities/deps/Snakefile +++ b/utilities/deps/Snakefile @@ -103,7 +103,7 @@ rule rewrite_gtf: """ Add the 'chr' prefix to the chromosome names """ input: gtf=rules.download_gtf.output, - rewrite=srcdir("scripts/rewrite-gtf.py"), + rewrite=workflow.source_path("scripts/rewrite-gtf.py"), output: gtf=gtf_renamed, singularity: @@ -170,7 +170,7 @@ rule create_refflat: rule rewrite_refflat: input: gtf=rules.create_refflat.output, - scr=srcdir("scripts/rewrite-refflat.py"), + scr=workflow.source_path("scripts/rewrite-refflat.py"), output: "ucsc_gencode.refFlat", singularity: @@ -183,7 +183,7 @@ rule rewrite_refflat: rule create_id_mappings: input: - srcdir("small-files/id_mappings.tsv"), + workflow.source_path("small-files/id_mappings.tsv"), output: "id_mappings.tsv", singularity: @@ -196,7 +196,7 @@ rule create_id_mappings: rule create_blacklist: input: - srcdir("small-files/blacklist.txt"), + workflow.source_path("small-files/blacklist.txt"), output: "blacklist.txt", singularity: @@ -238,7 +238,7 @@ rule create_rRNA_refflat: rule genome_hotspots: input: - srcdir("small-files/hotspots_genome.bed"), + workflow.source_path("small-files/hotspots_genome.bed"), output: "hotspots_genome.bed", singularity: @@ -252,7 +252,7 @@ rule genome_hotspots: rule create_report_genes: input: folder="arriba", - genes=srcdir("small-files/report_genes.txt"), + genes=workflow.source_path("small-files/report_genes.txt"), output: "arriba/report_genes.txt", singularity: @@ -293,7 +293,7 @@ rule unpack_vep_cache: rule copy_itd: """All the needed files are in small-files/itd, so we just copy them""" input: - fasta=srcdir("small-files/itd/itd_genes.fa"), + fasta=workflow.source_path("small-files/itd/itd_genes.fa"), output: itd=directory("itd"), singularity: