Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tools/nextdenovo #1515

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions tools/nextdenovo/macros.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<macros>
<token name="@TOOL_VERSION@">2.5.0</token>
<token name="@TOOL_VERSION@">2.5.2</token>
<token name="@VERSION_SUFFIX@">0</token>
<xml name="requirements">
<requirements>
Expand All @@ -13,15 +13,7 @@
</xml>
<xml name="citations">
<citations>
<citation type="bibtex">
@misc{githubNextDenovo,
author = {Jiang, Hu},
year = {2022},
title = {NextDenovo},
publisher = {GitHub},
journal = {GitHub repository},
url = {https://github.com/Nextomics/NextDenovo}}
</citation>
<citation type="doi">10.1186/s13059-024-03252-4</citation>
</citations>
</xml>
</macros>
43 changes: 19 additions & 24 deletions tools/nextdenovo/nextdenovo.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<import>macros.xml</import>
</macros>
<expand macro="biotools"/>
<expand macro="requirements" />
<expand macro="requirements"/>
<version_command>nextDenovo --version</version_command>
<command detect_errors="exit_code"><![CDATA[
mkdir -p './read_files' &&
Expand Down Expand Up @@ -49,7 +49,7 @@
]]></configfile>
</configfiles>
<inputs>
<param name="input_reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz" multiple="true" label="Sequence reads"/>
<param name="input_reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="Sequence reads" multiple="true"/>
<param name="task" type="select" label="Task">
<option value="all">All</option>
<option value="correct">Correct: only do the correction step</option>
Expand All @@ -71,60 +71,55 @@
<option value="seed">Seed cutoff</option>
</param>
<when value="genome">
<param name="genome_size" type="text" value="" optional="true" label="Estimated genome size" help="Estimated genome size, suffix K/M/G recognized, used to
calculate seed_cutoff/seed_cutfiles/blocksize and average depth, it can be omitted when manually setting seed_cutoff. Spaces are not allowed.">
<param name="genome_size" type="text" value="" optional="true" label="Estimated genome size" help="Estimated genome size, suffix K/M/G recognized, used to calculate seed_cutoff/seed_cutfiles/blocksize and average depth, it can be omitted when manually setting seed_cutoff. Spaces are not allowed.">
<sanitizer invalid_char="">
<valid initial="string.letters,string.digits">
<add value="."/>
</valid>
</sanitizer>
<validator type="regex">[0-9KMGkmg.]+</validator>
</param>
<param name="seed_depth" type="integer" min="0" value="45" label="Seed depth" help="Expected seed depth, used to calculate seed_cutoff, co-use with
genome_size, you can try to set it 30-45 to get a better assembly result." />
<param name="seed_depth" type="integer" min="0" value="45" label="Seed depth" help="Expected seed depth, used to calculate seed_cutoff, co-use with genome_size, you can try to set it 30-45 to get a better assembly result."/>
</when>
<when value="seed">
<param name="seed_cutoff" type="integer" min="0" value="0" optional="true" label="Seed cutoff" help="Minimum seed length. Set it to 0 for calculating it automatically." />
<param name="seed_cutoff" type="integer" min="0" value="0" optional="true" label="Seed cutoff" help="Minimum seed length. Set it to 0 for calculating it automatically."/>
</when>
</conditional>
<param name="blocksize" type="text" value="10g" label="Block size" help="Block size for parallel running, split non-seed reads into small files, the maximum size of
each file is blocksize.">
<param name="blocksize" type="text" value="10g" label="Block size" help="Block size for parallel running, split non-seed reads into small files, the maximum size of each file is blocksize.">
<sanitizer invalid_char="">
<valid initial="string.letters,string.digits"/>
</sanitizer>
<validator type="regex">[0-9KMGkmg]+</validator>
</param>
<section name="ovl_parameters" title="OVL sort parameters" expanded="true">
<param name="max_depth_overlap" type="integer" min="0" value="40" label="Max depth of each overlap" help="This value should be equal or smaller than
the average sequencing depth." />
<param name="max_depth_overlap" type="integer" min="0" value="40" label="Max depth of each overlap" help="This value should be equal or smaller than the average sequencing depth."/>
<param name="max_over_hang_length" type="integer" min="0" value="300" label="Max over hang length to filter"/>
</section>
<section name="minimap_parameters" title="Minimap2 parameters" expanded="true">
<param name="minlen" type="integer" min="0" value="500" label="Minimum overlap length"/>
<param name="minmatch" type="integer" min="0" value="100" label="Minimum match length"/>
<param name="minide" type="float" min="0" value="0.05" max="1" label="Minimum identity"/>
<param name="kn" type="integer" min="0" value="17" max="28" label="K-mer size"/>
<param name="minide" type="float" min="0" max="1" value="0.05" label="Minimum identity"/>
<param name="kn" type="integer" min="0" max="28" value="17" label="K-mer size"/>
<param name="wn" type="integer" min="0" value="10" label="Minimizer window size"/>
<param name="cn" type="integer" min="0" value="20" label="Re-align for every n reads"/>
<param name="maxhan1" type="integer" min="0" value="5000" label="Maximum over hang length for re-align"/>
<param name="maxhan2" type="integer" min="0" value="500" label="Maximum over hang length for filtering contained reads"/>
</section>
<section name="correction_options" title="Correction options" expanded="true">
<param name="split" type="boolean" truevalue="--split" falsevalue="" checked="false" label="Split" help="Split the corrected seed with un-corrected regions" />
<param name="fast" type="boolean" truevalue="-fast" falsevalue="" checked="false" label="Fast" help="0.5-1 times faster mode with a little lower accuracy." />
<param name="split" type="boolean" truevalue="--split" falsevalue="" checked="false" label="Split" help="Split the corrected seed with un-corrected regions"/>
<param name="fast" type="boolean" truevalue="-fast" falsevalue="" checked="false" label="Fast" help="0.5-1 times faster mode with a little lower accuracy."/>
</section>
<section name="np" title="NextGraph parameters">
<param argument="-c" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Disable pre-filter chimeric reads."/>
<param argument="-G" type="boolean" truevalue="-G" falsevalue="" checked="false" label="Retain potential chimeric edges."/>
<param argument="-k" type="boolean" truevalue="-k" falsevalue="" checked="false" label="Delete complex bubble paths."/>
<param argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="false" label="Output alternative contigs"
help="For highly heterozygous genomes, it will increase assembly size."/>
<param argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="false" label="Output alternative contigs" help="For highly heterozygous genomes, it will increase assembly size."/>
<param argument="-a" type="select" label="Output format">
<option value="1">FASTA</option>
<option value="3">GFA</option>
</param>
<param argument="-E" type="integer" min="0" value="1000" label="Minimum contig length"/>
<param argument="-q" type="integer" min="0" value="0" label="Minimum short branch length" help="By default it is disabled (value = 0)." />
<param argument="-q" type="integer" min="0" value="0" label="Minimum short branch length" help="By default it is disabled (value = 0)."/>
<param argument="-i" type="float" min="0" max="1" value="0.1" label="Minimum identity of alignmnents"/>
<param argument="-I" type="float" min="0" max="1" value="0.7" label="Minimum test-to-best identity ratio"/>
<param argument="-R" type="float" min="0" max="1" value="0" label="Maximum test-to-best identity ratio"/>
Expand Down Expand Up @@ -152,10 +147,10 @@
<data name="stats" format="txt" from_work_dir="03.ctg_graph/nd.asm.fasta.stat" label="${tool.name} on ${on_string}: stats">
<filter>task != 'correct'</filter>
</data>
<data name="asmp" format="txt" from_work_dir="03.ctg_graph/nd.asm.p.fasta" label="${tool.name} on ${on_string}: nd.asm.p.fasta">
<data name="asmp" format="fasta" from_work_dir="03.ctg_graph/nd.asm.p.fasta" label="${tool.name} on ${on_string}: nd.asm.p.fasta">
<filter>task != 'correct'</filter>
</data>
<data name="asm" format="txt" from_work_dir="03.ctg_graph/nd.asm.fasta.stat" label="${tool.name} on ${on_string}: nd.asm.p.fasta">
<data name="asm_fasta_stat" format="txt" from_work_dir="03.ctg_graph/nd.asm.fasta.stat" label="${tool.name} on ${on_string}: nd.asm.fasta.stat">
<filter>task != 'correct'</filter>
</data>
<data name="asm" format="txt" from_work_dir="02.cns_align/01.seed_cns.sh.work/seed_cns3" label="${tool.name} on ${on_string}: corrected">
Expand Down Expand Up @@ -193,7 +188,7 @@
<param name="seed_depth" value="45"/>
</conditional>
<assert_stderr>
<has_text text="The read/seed length is too short, and the assembly result is unexpected and please check the assembly quality carefully." />
<has_text text="The read/seed length is too short, and the assembly result is unexpected and please check the assembly quality carefully."/>
</assert_stderr>
</test>
<test expect_failure="true">
Expand All @@ -207,7 +202,7 @@
<param name="seed_depth" value="45"/>
</conditional>
<assert_stderr>
<has_text text="The read/seed length is too short, and the assembly result is unexpected and please check the assembly quality carefully." />
<has_text text="The read/seed length is too short, and the assembly result is unexpected and please check the assembly quality carefully."/>
</assert_stderr>
</test>
</tests>
Expand All @@ -216,5 +211,5 @@
a "correct-then-assemble" strategy similar to canu (no correction step for PacBio HiFi reads), but
requires significantly less computing resources and storages.
]]></help>
<expand macro="citations" />
</tool>
<expand macro="citations"/>
</tool>
Loading