Merge branch 'release_0.1.7' into 'develop'

Release v0.1.7 See merge request sacdallago/bio_embeddings!156
sacdallago · Mar 17, 2021 · 67eb41e · 67eb41e
2 parents c08b05d + 0d283ce
commit 67eb41e
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,9 +1,14 @@
 # Changelog
 
-## Unreleased
+## v0.1.7
 
  * Added `prottrans_t5_uniref50`/`ProtTransT5UniRef50Embedder`. This version improves over T5 BFD by being finetuned on UniRef50.
+ * Added a `half_model` option to both T5 models (`prottrans_t5_uniref50` and `prottrans_t5_bfd`). On the tested GPU (Quadro RTX 3000) `half_model: True` reduces memory consumption
+    from 12GB to 7GB while the effect in benchmarks is negligible (±0.1 percentages points in different sets,
+    generally below standard error). We therefore recommend switching to `half_model: True` for T5.
  * Added [DeepBLAST](https://github.com/flatironinstitute/deepblast) from [Protein Structural Alignments From Sequence](https://www.biorxiv.org/content/10.1101/2020.11.03.365932v1) (see example/deepblast for an example)
+ * Dropped python 3.6 support and added python 3.9 support
+ * Updated the docker example to cache weights
 
 ## v0.1.6
 

diff --git a/bio_embeddings/embed/prottrans_embedder.py b/bio_embeddings/embed/prottrans_embedder.py
@@ -126,7 +126,7 @@ class ProtTransT5BFDEmbedder(ProtTransT5Embedder):
     """Encoder of the ProtTrans T5 model trained on BFD
 
     We recommend settings `half_model=True`, which on the tested GPU (Quadro RTX 3000) reduces memory consumption
-    from 12GB to 7GB while the effect in benchmark is negligible (±0.1 percentages points in different sets,
+    from 12GB to 7GB while the effect in benchmarks is negligible (±0.1 percentages points in different sets,
     generally below standard error)
     """
 
@@ -137,7 +137,7 @@ class ProtTransT5UniRef50Embedder(ProtTransT5Embedder):
     """Encoder of the ProtTrans T5 model trained on BFD and finetuned on UniRef 50
 
     We recommend settings `half_model=True`, which on the tested GPU (Quadro RTX 3000) reduces memory consumption
-    from 12GB to 7GB while the effect in benchmark is negligible (±0.1 percentages points in different sets,
+    from 12GB to 7GB while the effect in benchmarks is negligible (±0.1 percentages points in different sets,
     generally below standard error)
     """
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bio_embeddings"
-version = "0.1.6"
+version = "0.1.7"
 description = "A pipeline for protein embedding generation and visualization"
 authors = ["Christian Dallago <[email protected]>", "Michael Heinzinger <[email protected]>", "Tobias Olenyi <[email protected]>", "Konstantin Schuetze <[email protected]>"]
 maintainers = ["Rostlab <[email protected]>"]