From 0d283ce1396a22eae18a6f845724eb630d49915c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konstantin=20Sch=C3=BCtze?= Date: Fri, 12 Mar 2021 14:44:16 +0100 Subject: [PATCH] Release v0.1.7 --- CHANGELOG.md | 7 ++++++- bio_embeddings/embed/prottrans_embedder.py | 4 ++-- pyproject.toml | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b695eeed..972e198f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,14 @@ # Changelog -## Unreleased +## v0.1.7 * Added `prottrans_t5_uniref50`/`ProtTransT5UniRef50Embedder`. This version improves over T5 BFD by being finetuned on UniRef50. + * Added a `half_model` option to both T5 models (`prottrans_t5_uniref50` and `prottrans_t5_bfd`). On the tested GPU (Quadro RTX 3000) `half_model: True` reduces memory consumption + from 12GB to 7GB while the effect in benchmarks is negligible (±0.1 percentages points in different sets, + generally below standard error). We therefore recommend switching to `half_model: True` for T5. * Added [DeepBLAST](https://github.com/flatironinstitute/deepblast) from [Protein Structural Alignments From Sequence](https://www.biorxiv.org/content/10.1101/2020.11.03.365932v1) (see example/deepblast for an example) + * Dropped python 3.6 support and added python 3.9 support + * Updated the docker example to cache weights ## v0.1.6 diff --git a/bio_embeddings/embed/prottrans_embedder.py b/bio_embeddings/embed/prottrans_embedder.py index 08e2d605..ab3cb6ca 100644 --- a/bio_embeddings/embed/prottrans_embedder.py +++ b/bio_embeddings/embed/prottrans_embedder.py @@ -126,7 +126,7 @@ class ProtTransT5BFDEmbedder(ProtTransT5Embedder): """Encoder of the ProtTrans T5 model trained on BFD We recommend settings `half_model=True`, which on the tested GPU (Quadro RTX 3000) reduces memory consumption - from 12GB to 7GB while the effect in benchmark is negligible (±0.1 percentages points in different sets, + from 12GB to 7GB while the effect in benchmarks is negligible (±0.1 percentages points in different sets, generally below standard error) """ @@ -137,7 +137,7 @@ class ProtTransT5UniRef50Embedder(ProtTransT5Embedder): """Encoder of the ProtTrans T5 model trained on BFD and finetuned on UniRef 50 We recommend settings `half_model=True`, which on the tested GPU (Quadro RTX 3000) reduces memory consumption - from 12GB to 7GB while the effect in benchmark is negligible (±0.1 percentages points in different sets, + from 12GB to 7GB while the effect in benchmarks is negligible (±0.1 percentages points in different sets, generally below standard error) """ diff --git a/pyproject.toml b/pyproject.toml index 2a44d802..c9ea15d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bio_embeddings" -version = "0.1.6" +version = "0.1.7" description = "A pipeline for protein embedding generation and visualization" authors = ["Christian Dallago ", "Michael Heinzinger ", "Tobias Olenyi ", "Konstantin Schuetze "] maintainers = ["Rostlab "]