Merge pull request #1163 from zalandoresearch/GH-1156-transformers

GH-1156: PyTorch-Transformers -> Transformers
flairNLP · Sep 27, 2019 · 3013f66 · 3013f66
2 parents 5431e9d + 3b6e8f6
commit 3013f66
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 13 deletions.
diff --git a/flair/embeddings.py b/flair/embeddings.py
@@ -15,7 +15,7 @@
 from deprecated import deprecated
 from torch.nn import ParameterList, Parameter
 
-from pytorch_transformers import (
+from transformers import (
     BertTokenizer,
     BertModel,
     RobertaTokenizer,
@@ -997,7 +997,7 @@ def _build_token_subwords_mapping(
     Token index (key) and number of corresponding subwords (value) for a sentence.
 
     :param sentence: input sentence
-    :param tokenizer: PyTorch-Transformers tokenization object
+    :param tokenizer: Transformers tokenization object
     :return: dictionary of token index to corresponding number of subwords
     """
     token_subwords_mapping: Dict[int, int] = {}
@@ -1019,7 +1019,7 @@ def _build_token_subwords_mapping_gpt2(
     Token index (key) and number of corresponding subwords (value) for a sentence.
 
     :param sentence: input sentence
-    :param tokenizer: PyTorch-Transformers tokenization object
+    :param tokenizer: Transformers tokenization object
     :return: dictionary of token index to corresponding number of subwords
     """
     token_subwords_mapping: Dict[int, int] = {}
@@ -1962,11 +1962,11 @@ def __init__(
 
         if bert_model_or_path.startswith("distilbert"):
             try:
-                from pytorch_transformers import DistilBertTokenizer, DistilBertModel
+                from transformers import DistilBertTokenizer, DistilBertModel
             except ImportError:
                 log.warning("-" * 100)
                 log.warning(
-                    "ATTENTION! To use DistilBert, please first install a recent version of pytorch-transformers!"
+                    "ATTENTION! To use DistilBert, please first install a recent version of transformers!"
                 )
                 log.warning("-" * 100)
                 pass

diff --git a/requirements.txt b/requirements.txt
@@ -9,7 +9,7 @@ sklearn
 sqlitedict>=1.6.0
 deprecated>=1.2.4
 hyperopt>=0.1.1
-pytorch-transformers>=1.1.0
+transformers>=2.0.0
 bpemb>=0.2.9
 regex
 tabulate

diff --git a/resources/docs/embeddings/TRANSFORMER_EMBEDDINGS.md b/resources/docs/embeddings/TRANSFORMER_EMBEDDINGS.md
@@ -1,6 +1,6 @@
-# PyTorch-Transformers
+# Transformers
 
-Thanks to the brilliant [`pytorch-transformers`](https://github.com/huggingface/pytorch-transformers) library from [Hugging Face](https://github.com/huggingface),
+Thanks to the brilliant [`transformers`](https://github.com/huggingface/transformers) library from [Hugging Face](https://github.com/huggingface),
 Flair is able to support various Transformer-based architectures like BERT or XLNet.
 
 The following embeddings can be used in Flair:
@@ -14,7 +14,7 @@ The following embeddings can be used in Flair:
 * `RoBERTaEmbeddings`
 
 This section shows how to use these Transformer-based architectures in Flair and is heavily based on the excellent
-[PyTorch-Transformers pre-trained models documentation](https://huggingface.co/pytorch-transformers/pretrained_models.html).
+[Transformers pre-trained models documentation](https://huggingface.co/transformers/pretrained_models.html).
 
 ## BERT Embeddings
 
@@ -76,13 +76,13 @@ You can load any of the pre-trained BERT models by providing `bert_model_or_path
 |                                                         | (see [details](https://github.com/google-research/bert/#bert))
 | `bert-large-uncased-whole-word-masking-finetuned-squad` | 24-layer, 1024-hidden, 16-heads, 340M parameters
 |                                                         | The `bert-large-uncased-whole-word-masking` model fine-tuned on SQuAD (see details of fine-tuning in the
-|                                                         | [example section of PyTorch-Transformers](https://github.com/huggingface/pytorch-transformers/tree/master/examples))
+|                                                         | [example section of Transformers](https://github.com/huggingface/transformers/tree/master/examples))
 | `bert-large-cased-whole-word-masking-finetuned-squad`   | 24-layer, 1024-hidden, 16-heads, 340M parameters
 |                                                         | The `bert-large-cased-whole-word-masking` model fine-tuned on SQuAD
-|                                                         | (see [details of fine-tuning in the example section](https://huggingface.co/pytorch-transformers/examples.html))
+|                                                         | (see [details of fine-tuning in the example section](https://huggingface.co/transformers/examples.html))
 | `bert-base-cased-finetuned-mrpc`                        | 12-layer, 768-hidden, 12-heads, 110M parameters
 |                                                         | The `bert-base-cased` model fine-tuned on MRPC
-|                                                         | (see [details of fine-tuning in the example section of PyTorch-Transformers](https://huggingface.co/pytorch-transformers/examples.html))
+|                                                         | (see [details of fine-tuning in the example section of Transformers](https://huggingface.co/transformers/examples.html))
 
 It is also possible to use [distilled versions](https://medium.com/huggingface/distilbert-8cf3380435b5)
 of BERT (DistilBERT):

diff --git a/tests/test_transformer_embeddings.py b/tests/test_transformer_embeddings.py
@@ -12,7 +12,7 @@
     XLMEmbeddings,
 )
 
-from pytorch_transformers import (
+from transformers import (
     RobertaModel,
     RobertaTokenizer,
     OpenAIGPTModel,