Skip to content

Commit

Permalink
1.2.2
Browse files Browse the repository at this point in the history
  • Loading branch information
tdayris committed Jul 10, 2024
1 parent 8091485 commit af1833a
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 315 deletions.
2 changes: 1 addition & 1 deletion .test/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ snakefiles := \


python_scripts := \
"../workflow/scripts/bioinfokit_pca.py" \
"../workflow/scripts/build_datavzrd_yaml.py" \
"../workflow/scripts/fair_rnaseq_salmon_quant_multiqc_config.py" \
"../workflow/scripts/in_house_pca.py" \
"../workflow/scripts/in_house_qc_mt_ribo_percents.py" \
"../workflow/scripts/merge_salmon_quant.py" \
"../workflow/scripts/qc_table.py" \
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# 1.2.2

## Features:

* Snakemake wrappers up to 3.13.6
* fair_genome_indexer up to 3.8.0
* fair_fastqc_multiqc up to 2.3.5

# 1.2.1

## Features:
Expand Down
140 changes: 8 additions & 132 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -111,142 +111,22 @@ def lookup_genomes(
"""
Run lookup function with default parameters in order to search user-provided sequence/annotation files
"""
query: str = (
query = str(
"species == '{wildcards.species}' & build == '{wildcards.build}' & release == '{wildcards.release}'".format(
wildcards=wildcards
)
)
return getattr(lookup(query=query, within=genomes), key, default)


def get_dna_fasta(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Return path to the final DNA fasta sequences
"""
default: str = (
"reference/sequences/{wildcards.species}.{wildcards.build}.{wildcards.release}.dna.fasta".format(
wildcards=wildcards
)
)
return lookup_genomes(wildcards, key="dna_fasta", default=default, genomes=genomes)


def get_cdna_fasta(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Return path to the final cDNA fasta sequences
"""
default: str = (
"reference/sequences/{wildcards.species}.{wildcards.build}.{wildcards.release}.cdna.fasta".format(
wildcards=wildcards
)
)
return lookup_genomes(wildcards, key="cdna_fasta", default=default, genomes=genomes)


def get_transcripts_fasta(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Return path to the final cDNA transcripts fasta sequences
"""
default: str = (
"reference/sequences/{wildcards.species}.{wildcards.build}.{wildcards.release}.transcripts.fasta".format(
wildcards=wildcards
)
)
return lookup_genomes(
wildcards, key="transcripts_fasta", default=default, genomes=genomes
)


def select_fasta(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Evaluates the {datatype} wildcard, and return the right fasta file
"""
return branch(
condition=str(wildcards.datatype).lower(),
cases={
"dna": get_dna_fasta(wildcards),
"cdna": get_cdna_fasta(wildcards),
"transcripts": get_transcripts_fasta(wildcards),
},
)


def get_dna_fai(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Return path to the final DNA fasta sequences index
"""
default: str = (
"reference/sequences/{wildcards.species}.{wildcards.build}.{wildcards.release}.dna.fasta.fai".format(
wildcards=wildcards
)
)
return lookup_genomes(wildcards, key="dna_fai", default=default, genomes=genomes)


def get_cdna_fai(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Return path to the final cDNA fasta sequences index
"""
default: str = (
"reference/sequences/{wildcards.species}.{wildcards.build}.{wildcards.release}.cdna.fasta.fai".format(
wildcards=wildcards
)
)
return lookup_genomes(wildcards, key="cdna_fai", default=default, genomes=genomes)


def get_transcripts_fai(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Return path to the final cDNA transcripts fasta sequences index
"""
default: str = (
"reference/sequences/{wildcards.species}.{wildcards.build}.{wildcards.release}.transcripts.fasta.fai".format(
wildcards=wildcards
)
)
return lookup_genomes(
wildcards, key="transcripts_fai", default=default, genomes=genomes
)


def select_fai(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Evaluates the {datatype} wildcard, and return the right fasta index file
"""
return branch(
condition=str(wildcards.datatype).lower(),
cases={
"dna": get_dna_fai(wildcards),
"cdna": get_cdna_fai(wildcards),
"transcripts": get_transcripts_fai(wildcards),
},
)


def get_gtf(
wildcards: snakemake.io.Wildcards, genomes: pandas.DataFrame = genomes
) -> str:
"""
Return path to the final genome annotation
"""
default: str = (
"reference/annotation/{wildcards.species}.{wildcards.build}.{wildcards.release}.gtf".format(
default = str(
"reference/annotation/{wildcards.species}.{wildcards.build}.{wildcards.release}/{wildcards.species}.{wildcards.build}.{wildcards.release}.gtf".format(
wildcards=wildcards
)
)
Expand All @@ -259,8 +139,8 @@ def get_tx2gene(
"""
Return path to final tx2gene table
"""
default: str = (
"reference/annotation/{wildcards.species}.{wildcards.build}.{wildcards.release}.t2g.tsv".format(
default = str(
"reference/annotation/{wildcards.species}.{wildcards.build}.{wildcards.release}/{wildcards.species}.{wildcards.build}.{wildcards.release}.t2g.tsv".format(
wildcards=wildcards
)
)
Expand All @@ -273,8 +153,8 @@ def get_id2gene(
"""
Return path to final id2gene table
"""
default: str = (
"reference/annotation/{wildcards.species}.{wildcards.build}.{wildcards.release}.id_to_gene.tsv".format(
default = str(
"reference/annotation/{wildcards.species}.{wildcards.build}.{wildcards.release}/{wildcards.species}.{wildcards.build}.{wildcards.release}.id_to_gene.tsv".format(
wildcards=wildcards
)
)
Expand Down Expand Up @@ -374,11 +254,7 @@ def get_salmon_quant_reads_input(

results: dict[str, str | list[str]] = {
"index": ancient(salmon_index),
"gtf": lookup_genomes(
wildcards=wildcards,
key="gtf",
default=f"reference/annotation/{species}.{build}.{release}.gtf",
),
"gtf": get_gtf(wildcards),
}

if downstream_file or not pandas.isna(downstream_file):
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/fair_fastqc_multiqc_pipeline.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module fair_fastqc_multiqc:
snakefile:
github("tdayris/fair_fastqc_multiqc", path="workflow/Snakefile", tag="2.2.8")
github("tdayris/fair_fastqc_multiqc", path="workflow/Snakefile", tag="2.3.5")
config:
config

Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/fair_genome_indexer_pipeline.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module fair_genome_indexer:
snakefile:
github("tdayris/fair_genome_indexer", path="workflow/Snakefile", tag="3.5.0")
github("tdayris/fair_genome_indexer", path="workflow/Snakefile", tag="3.8.0")
config:
config

Expand Down
58 changes: 1 addition & 57 deletions workflow/rules/salmon_quant_meta.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,62 +5,6 @@ module salmon_tximport:
config


use rule salmon_decoy_sequences from salmon_tximport as fair_rnaseq_salmon_quant_salmon_decoy_sequences with:
input:
transcriptome=lambda wildcards: get_transcripts_fasta(wildcards),
genome=lambda wildcards: get_dna_fasta(wildcards),
output:
gentrome=temp("reference/sequences/{species}.{build}.{release}.gentrome.fasta"),
decoys=temp("reference/sequences/{species}.{build}.{release}.decoys.txt"),
threads: 2
resources:
mem_mb=lambda wildcards, attempt: 512 * attempt,
runtime=lambda wildcards, attempt: 25 * attempt,
tmpdir=tmp,
log:
"logs/fair_rnaseq_salmon_quant_salmon_decoy_sequences/{species}.{build}.{release}.log",
benchmark:
"benchmark/fair_rnaseq_salmon_quant_salmon_decoy_sequences/{species}.{build}.{release}.tsv"


use rule salmon_index_gentrome from salmon_tximport as fair_rnaseq_salmon_quant_salmon_index_gentrome with:
input:
sequences="reference/sequences/{species}.{build}.{release}.gentrome.fasta",
decoys="reference/sequences/{species}.{build}.{release}.decoys.txt",
output:
temp(
multiext(
"reference/salmon_index/{species}.{build}.{release}/{species}.{build}.{release}/",
"complete_ref_lens.bin",
"ctable.bin",
"ctg_offsets.bin",
"duplicate_clusters.tsv",
"info.json",
"mphf.bin",
"pos.bin",
"pre_indexing.log",
"rank.bin",
"refAccumLengths.bin",
"ref_indexing.log",
"reflengths.bin",
"refseq.bin",
"seq.bin",
"versionInfo.json",
)
),
threads: 20
resources:
mem_mb=lambda wildcards, attempt: 48 * 1024 * attempt,
runtime=lambda wildcards, attempt: 50 * attempt,
tmpdir=tmp,
log:
"logs/fair_rnaseq_salmon_quant_salmon_index_gentrome/{species}.{build}.{release}.log",
benchmark:
"benchmark/fair_rnaseq_salmon_quant_salmon_index_gentrome/{species}.{build}.{release}.tsv"
params:
extra=lookup_config(dpath="params/salmon/index", default=""),


use rule salmon_quant_reads from salmon_tximport as fair_rnaseq_salmon_quant_salmon_quant_reads with:
input:
unpack(get_salmon_quant_reads_input),
Expand Down Expand Up @@ -161,7 +105,7 @@ use rule tximport from salmon_tximport as fair_rnaseq_salmon_quant_tximport with
),
),
tx_to_gene=expand(
"reference/annotation/{genome.species}.{genome.build}.{genome.release}.id_to_gene.tsv",
"reference/annotation/{genome.species}.{genome.build}.{genome.release}/{genome.species}.{genome.build}.{genome.release}.id_to_gene.tsv",
genome=lookup(
query="species == '{species}' & build == '{build}' & release == '{release}'",
within=genomes,
Expand Down
Loading

0 comments on commit af1833a

Please sign in to comment.