From b46513622a180f1367c9b73baa26d84bcdccad59 Mon Sep 17 00:00:00 2001 From: Andrew Tritt Date: Mon, 4 Mar 2024 16:07:31 -0800 Subject: [PATCH] Add JOSS paper and citations (#45) * Add JOSS paper and citations --- CITATION.bib | 7 ++++++ paper/paper.bib | 21 +++++++++++++++++ paper/paper.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++ src/gtnet/main.py | 2 +- 4 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 CITATION.bib create mode 100644 paper/paper.bib create mode 100644 paper/paper.md diff --git a/CITATION.bib b/CITATION.bib new file mode 100644 index 0000000..4807984 --- /dev/null +++ b/CITATION.bib @@ -0,0 +1,7 @@ +@software{gtnet, + author = {Tritt, Andrew}, + title = {{The Genome Taxonomy Network}}, + url = {https://github.com/exabiome/gtnet}, + version = {0.0.6}, + year = {2024} +} diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..34460e7 --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,21 @@ +@article{GTDB, + doi = {10.1093/nar/gkab776}, + url = {https://doi.org/10.1093%2Fnar%2Fgkab776}, + year = 2021, + month = {sep}, + publisher = {Oxford University Press ({OUP})}, + volume = {50}, + number = {D1}, + pages = {D785--D794}, + author = {Donovan H Parks and Maria Chuvochina and Christian Rinke and Aaron J Mussig and Pierre-Alain Chaumeil and Philip Hugenholtz}, + title = {{GTDB}: an ongoing census of bacterial and archaeal diversity through a phylogenetically consistent, rank normalized~and complete genome-based taxonomy}, + journal = {Nucleic Acids Research} +} + +@software{gtnet, + author = {Tritt, Andrew}, + title = {{The Genome Taxonomy Network}}, + url = {https://github.com/exabiome/gtnet}, + version = {0.0.6}, + year = {2024} +} diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..89c5d8c --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,60 @@ +--- +title: 'gtnet: A Python package for taxonomic labelling with the Genome Taxonomy Network' +tags: + - Python + - metagenomics + - deep learning +authors: + - name: Andrew J. Tritt + orcid: 0000-0002-1617-449X + equal-contrib: false + affiliation: "1, 2" # (Multiple affiliations must be quoted) + - name: Kristofer Bouchard + orcid: 0000-0002-1974-4603 + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: 2 + - name: Author with no affiliation + corresponding: true # (This is how to denote the corresponding author) + affiliation: 3 + - given-names: Ludwig + dropping-particle: van + surname: Beethoven + affiliation: 3 +affiliations: + - name: Applied Math and Computational Research Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA + index: 1 + - name: Scientific Data Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA. + index: 2 + - name: Biological Systems and Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA + index: 3 + - name: Helen Wills Neuroscience Institute and Redwood Center for Theoretical Neuroscience, University of California Berkeley, Berkeley, CA, USA. + index: 4 +date: 13 August 2017 +bibliography: paper.bib +--- + +# Summary + +The field of metagenomics seeks to understand the genomic and functional diversity of microbial +communities. Modern metagenomic sequencing pipelines produce unlabelled genomic sequences at an +unprecedented rate. Processing of these sequences, i.e. contigs, involves labelling the taxonomy of these +contigs. In recent years, the metagenomics fields has been coalescing around the use of the Genome +Taxonomy Database [@GTDB], a phylogenetically informed taxonomy for consistently labelling microbial taxa. +The Genome Taxonomy Network, `GTNet`, is a neural network capable of classifying metagenomic +contigs with taxonomic labels from the Genome Taxonomy Database. + +# Statement of need + +`gtnet` [@gtnet] is a Python package and command-line utility built on top of `GTNet`. The purpose of this software +is to make the predictive capabilities of the GTNet easily accessible to the meteagenomics community. + +In addition to deploying GTNet, the `gtnet` software seeks to address other outstanding issues in the +field. Many taxonomic classification tools are still released as source code in tarball formats, require +installation of third-party software that may no longer be maintained, or use application-specific output formats. +These issues make existing tools cumbersome and difficult to use. By leveraging the existing Python ecosystem, we +seek to make a tool that is easier to use and version for the sake if user-friendliness and reproducibility. + +By releasing easily-installable and user-friendly software capable of generating GTDB taxonomies, we +hope to lower the technical barrier to wide adoption of standardized taxonomy across the metagenomics field. + +# References diff --git a/src/gtnet/main.py b/src/gtnet/main.py index e9629ab..eefd5fa 100644 --- a/src/gtnet/main.py +++ b/src/gtnet/main.py @@ -32,7 +32,7 @@ def print_help(): for c, f in command_dict.items(): nspaces = 16 - len(c) print(f' {c}' + ' '*nspaces + f.doc, file=sio) - print(' help print this usage statememt\n', file=sio) + print(' help print this usage statement\n', file=sio) sys.stdout.write(sio.getvalue())