huggingface · LysandreJik · Nov 17, 2020 · Nov 17, 2020 · Nov 17, 2020 · Nov 17, 2020
diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
@@ -16,7 +16,7 @@ on:
 
 jobs:
   run_tests_torch_gpu:
-    runs-on: [self-hosted, single-gpu]
+    runs-on: [self-hosted, gpu, single-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -86,7 +86,7 @@ jobs:
 
 
   run_tests_tf_gpu:
-    runs-on: [self-hosted, single-gpu]
+    runs-on: [self-hosted, gpu, single-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -154,7 +154,7 @@ jobs:
           path: reports
 
   run_tests_torch_multi_gpu:
-    runs-on: [self-hosted, multi-gpu]
+    runs-on: [self-hosted, gpu, multi-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -213,7 +213,7 @@ jobs:
           path: reports
 
   run_tests_tf_multi_gpu:
-    runs-on: [self-hosted, multi-gpu]
+    runs-on: [self-hosted, gpu, multi-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
@@ -9,13 +9,14 @@ on:
   push:
     branches:
       - ci_*
+      - framework-agnostic-tokenizers
   repository_dispatch:
   schedule:
     - cron: "0 0 * * *"
 
 jobs:
   run_all_tests_torch_gpu:
-    runs-on: [self-hosted, single-gpu]
+    runs-on: [self-hosted, gpu, single-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -109,7 +110,7 @@ jobs:
 
 
   run_all_tests_tf_gpu:
-    runs-on: [self-hosted, single-gpu]
+    runs-on: [self-hosted, gpu, single-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -188,7 +189,7 @@ jobs:
           path: reports
 
   run_all_tests_torch_multi_gpu:
-    runs-on: [self-hosted, multi-gpu]
+    runs-on: [self-hosted, gpu, multi-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -279,7 +280,7 @@ jobs:
           path: reports
 
   run_all_tests_tf_multi_gpu:
-    runs-on: [self-hosted, multi-gpu]
+    runs-on: [self-hosted, gpu, multi-gpu]
     steps:
       - uses: actions/checkout@v2
 

diff --git a/docs/source/model_doc/marian.rst b/docs/source/model_doc/marian.rst
@@ -78,7 +78,7 @@ require 3 character language codes:
     tokenizer = MarianTokenizer.from_pretrained(model_name)
     print(tokenizer.supported_language_codes)
     model = MarianMTModel.from_pretrained(model_name)
-    translated = model.generate(**tokenizer.prepare_seq2seq_batch(src_text))
+    translated = model.generate(**tokenizer.prepare_seq2seq_batch(src_text, return_tensors="pt"))
     tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
     # ["c'est une phrase en anglais que nous voulons traduire en français",
     # 'Isto deve ir para o português.',
@@ -150,7 +150,7 @@ Example of translating english to many romance languages, using old-style 2 char
     print(tokenizer.supported_language_codes)
 
     model = MarianMTModel.from_pretrained(model_name)
-    translated = model.generate(**tokenizer.prepare_seq2seq_batch(src_text))
+    translated = model.generate(**tokenizer.prepare_seq2seq_batch(src_text, return_tensors="pt"))
     tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
     # ["c'est une phrase en anglais que nous voulons traduire en français", 'Isto deve ir para o português.',  'Y esto al español']
 

diff --git a/docs/source/model_doc/mbart.rst b/docs/source/model_doc/mbart.rst
@@ -44,7 +44,7 @@ the sequences for sequence-to-sequence fine-tuning.
 
     example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
     expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
-    batch = tokenizer.prepare_seq2seq_batch(example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian)
+    batch = tokenizer.prepare_seq2seq_batch(example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian, return_tensors="pt")
     model(input_ids=batch['input_ids'], labels=batch['labels']) # forward pass
 
 - Generation
@@ -58,7 +58,7 @@ the sequences for sequence-to-sequence fine-tuning.
     model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
     tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro")
     article = "UN Chief Says There Is No Military Solution in Syria"
-    batch = tokenizer.prepare_seq2seq_batch(src_texts=[article], src_lang="en_XX")
+    batch = tokenizer.prepare_seq2seq_batch(src_texts=[article], src_lang="en_XX", return_tensors="pt")
     translated_tokens = model.generate(**batch, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
     translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
     assert translation == "Şeful ONU declară că nu există o soluţie militară în Siria"

diff --git a/docs/source/model_doc/pegasus.rst b/docs/source/model_doc/pegasus.rst
@@ -78,7 +78,7 @@ Usage Example
     torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
     tokenizer = PegasusTokenizer.from_pretrained(model_name)
     model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
-    batch = tokenizer.prepare_seq2seq_batch(src_text, truncation=True, padding='longest').to(torch_device)
+    batch = tokenizer.prepare_seq2seq_batch(src_text, truncation=True, padding='longest', return_tensors="pt").to(torch_device)
     translated = model.generate(**batch)
     tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
     assert tgt_text[0] == "California's largest electricity provider has turned off power to hundreds of thousands of customers."

diff --git a/model_cards/tuner007/pegasus_paraphrase/README.md b/model_cards/tuner007/pegasus_paraphrase/README.md
@@ -11,7 +11,7 @@ tokenizer = PegasusTokenizer.from_pretrained(model_name)
 model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
 
 def get_response(input_text,num_return_sequences):
-  batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60).to(torch_device)
+  batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
   translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
   tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
   return tgt_text

diff --git a/model_cards/tuner007/pegasus_qa/README.md b/model_cards/tuner007/pegasus_qa/README.md
@@ -12,7 +12,7 @@ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_dev
 
 def get_answer(question, context):
   input_text = "question: %s text: %s" % (question,context)
-  batch = tokenizer.prepare_seq2seq_batch([input_text], truncation=True, padding='longest').to(torch_device)
+  batch = tokenizer.prepare_seq2seq_batch([input_text], truncation=True, padding='longest', return_tensors="pt").to(torch_device)
   translated = model.generate(**batch)
   tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
   return tgt_text[0]

diff --git a/scripts/fsmt/fsmt-make-super-tiny-model.py b/scripts/fsmt/fsmt-make-super-tiny-model.py
@@ -58,7 +58,7 @@
 print(f"num of params {tiny_model.num_parameters()}")
 
 # Test
-batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
+batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"], return_tensors="pt")
 outputs = tiny_model(**batch)
 
 print("test output:", len(outputs.logits[0]))

diff --git a/scripts/fsmt/fsmt-make-tiny-model.py b/scripts/fsmt/fsmt-make-tiny-model.py
@@ -29,7 +29,7 @@
 print(f"num of params {tiny_model.num_parameters()}")
 
 # Test
-batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
+batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"], return_tensors="pt")
 outputs = tiny_model(**batch)
 
 print("test output:", len(outputs.logits[0]))

diff --git a/src/transformers/models/bart/tokenization_bart.py b/src/transformers/models/bart/tokenization_bart.py
@@ -61,7 +61,7 @@ def prepare_seq2seq_batch(
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
         padding: str = "longest",
-        return_tensors: str = "None",
+        return_tensors: str = None,
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:
@@ -91,7 +91,7 @@ def prepare_seq2seq_batch(
                   maximum acceptable input length for the model if that argument is not provided.
                 * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                   different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`, defaults to "pt"):
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.

diff --git a/src/transformers/models/bart/tokenization_bart_fast.py b/src/transformers/models/bart/tokenization_bart_fast.py
@@ -56,7 +56,7 @@ def prepare_seq2seq_batch(
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
         padding: str = "longest",
-        return_tensors: str = "None",
+        return_tensors: str = None,
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:
@@ -86,7 +86,7 @@ def prepare_seq2seq_batch(
                   maximum acceptable input length for the model if that argument is not provided.
                 * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                   different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`, defaults to "pt"):
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.

diff --git a/src/transformers/models/fsmt/tokenization_fsmt.py b/src/transformers/models/fsmt/tokenization_fsmt.py
@@ -491,7 +491,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         truncation=True,
         padding="longest",
         **unused,

diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py
@@ -41,7 +41,7 @@ class MarianMTModel(BartForConditionalGeneration):
 
         >>> model = MarianMTModel.from_pretrained(mname)
         >>> tok = MarianTokenizer.from_pretrained(mname)
-        >>> batch = tok.prepare_seq2seq_batch(src_texts=[sample_text])  # don't need tgt_text for inference
+        >>> batch = tok.prepare_seq2seq_batch(src_texts=[sample_text], return_tensors="pt")  # don't need tgt_text for inference
         >>> gen = model.generate(**batch)  # for forward pass: model(**batch)
         >>> words: List[str] = tok.batch_decode(gen, skip_special_tokens=True)  # returns "Where is the bus stop ?"
 

diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py
@@ -70,7 +70,7 @@ class MarianTokenizer(PreTrainedTokenizer):
         >>> tok = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de')
         >>> src_texts = [ "I am a small frog.", "Tom asked his teacher for advice."]
         >>> tgt_texts = ["Ich bin ein kleiner Frosch.", "Tom bat seinen Lehrer um Rat."]  # optional
-        >>> batch_enc: BatchEncoding = tok.prepare_seq2seq_batch(src_texts, tgt_texts=tgt_texts)
+        >>> batch_enc: BatchEncoding = tok.prepare_seq2seq_batch(src_texts, tgt_texts=tgt_texts, return_tensors="pt")
         >>> # keys  [input_ids, attention_mask, labels].
         >>> # model(**batch) should work
     """
@@ -175,7 +175,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         truncation=True,
         padding="longest",
         **unused,

diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py
@@ -22,7 +22,7 @@ class MBartForConditionalGeneration(BartForConditionalGeneration):
         >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
         >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro")
         >>> article = "UN Chief Says There Is No Military Solution in Syria"
-        >>> batch = tokenizer.prepare_seq2seq_batch(src_texts=[article])
+        >>> batch = tokenizer.prepare_seq2seq_batch(src_texts=[article], return_tensors="pt")
         >>> translated_tokens = model.generate(**batch)
         >>> translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
         >>> assert translation == "Şeful ONU declară că nu există o soluţie militară în Siria"

diff --git a/src/transformers/models/mbart/tokenization_mbart.py b/src/transformers/models/mbart/tokenization_mbart.py
@@ -81,7 +81,7 @@ class MBartTokenizer(XLMRobertaTokenizer):
         >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
         >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
         >>> batch: dict = tokenizer.prepare_seq2seq_batch(
-        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian
+        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian, return_tensors="pt"
         ... )
 
     """
@@ -183,7 +183,7 @@ def prepare_seq2seq_batch(
         max_target_length: Optional[int] = None,
         truncation: bool = True,
         padding: str = "longest",
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         add_prefix_space: bool = False,  # ignored
         **kwargs,
     ) -> BatchEncoding:

diff --git a/src/transformers/models/mbart/tokenization_mbart_fast.py b/src/transformers/models/mbart/tokenization_mbart_fast.py
@@ -89,7 +89,7 @@ class MBartTokenizerFast(XLMRobertaTokenizerFast):
         >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
         >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
         >>> batch: dict = tokenizer.prepare_seq2seq_batch(
-        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian
+        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian, return_tensors="pt"
         ... )
     """
 
@@ -181,7 +181,7 @@ def prepare_seq2seq_batch(
         max_target_length: Optional[int] = None,
         truncation: bool = True,
         padding: str = "longest",
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         **kwargs,
     ) -> BatchEncoding:
         if max_length is None:

diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py
@@ -38,7 +38,7 @@ class PegasusForConditionalGeneration(BartForConditionalGeneration):
 
         >>> model = PegasusForConditionalGeneration.from_pretrained(mname)
         >>> tok = PegasusTokenizer.from_pretrained(mname)
-        >>> batch = tok.prepare_seq2seq_batch(src_texts=[PGE_ARTICLE])  # don't need tgt_text for inference
+        >>> batch = tok.prepare_seq2seq_batch(src_texts=[PGE_ARTICLE], return_tensors="pt")  # don't need tgt_text for inference
         >>> gen = model.generate(**batch)  # for forward pass: model(**batch)
         >>> summary: List[str] = tok.batch_decode(gen, skip_special_tokens=True)
         >>> assert summary == "California's largest electricity provider has turned off power to tens of thousands of customers."

diff --git a/src/transformers/models/pegasus/tokenization_pegasus.py b/src/transformers/models/pegasus/tokenization_pegasus.py
@@ -134,7 +134,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         truncation=True,
         padding="longest",
         **unused,

diff --git a/src/transformers/models/pegasus/tokenization_pegasus_fast.py b/src/transformers/models/pegasus/tokenization_pegasus_fast.py
@@ -95,7 +95,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         truncation=True,
         padding="longest",
         **unused,

diff --git a/src/transformers/models/rag/tokenization_rag.py b/src/transformers/models/rag/tokenization_rag.py
@@ -71,7 +71,7 @@ def prepare_seq2seq_batch(
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
         padding: str = "longest",
-        return_tensors: str = "np",
+        return_tensors: str = None,
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:

diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
@@ -797,7 +797,7 @@ def prepare_seq2seq_batch(
                   maximum acceptable input length for the model if that argument is not provided.
                 * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                   different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`, defaults to "pt"):
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
@@ -1455,7 +1455,7 @@ def all_special_ids(self) -> List[int]:
                   maximum acceptable input length for the model if that argument is not provided.
                 * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                   different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`, defaults to "pt"):
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.

diff --git a/tests/test_modeling_marian.py b/tests/test_modeling_marian.py
@@ -132,9 +132,9 @@ def _assert_generated_batch_equal_expected(self, **tokenizer_kwargs):
         self.assertListEqual(self.expected_text, generated_words)
 
     def translate_src_text(self, **tokenizer_kwargs):
-        model_inputs = self.tokenizer.prepare_seq2seq_batch(src_texts=self.src_text, **tokenizer_kwargs).to(
-            torch_device
-        )
+        model_inputs = self.tokenizer.prepare_seq2seq_batch(
+            src_texts=self.src_text, return_tensors="pt", **tokenizer_kwargs
+        ).to(torch_device)
         self.assertEqual(self.model.device, model_inputs.input_ids.device)
         generated_ids = self.model.generate(
             model_inputs.input_ids, attention_mask=model_inputs.attention_mask, num_beams=2, max_length=128
@@ -151,7 +151,9 @@ def test_forward(self):
         src, tgt = ["I am a small frog"], ["Ich bin ein kleiner Frosch."]
         expected_ids = [38, 121, 14, 697, 38848, 0]
 
-        model_inputs: dict = self.tokenizer.prepare_seq2seq_batch(src, tgt_texts=tgt).to(torch_device)
+        model_inputs: dict = self.tokenizer.prepare_seq2seq_batch(src, tgt_texts=tgt, return_tensors="pt").to(
+            torch_device
+        )
 
         self.assertListEqual(expected_ids, model_inputs.input_ids[0].tolist())
 
@@ -171,12 +173,16 @@ def test_forward(self):
 
     def test_unk_support(self):
         t = self.tokenizer
-        ids = t.prepare_seq2seq_batch(["||"]).to(torch_device).input_ids[0].tolist()
+        ids = t.prepare_seq2seq_batch(["||"], return_tensors="pt").to(torch_device).input_ids[0].tolist()
         expected = [t.unk_token_id, t.unk_token_id, t.eos_token_id]
         self.assertEqual(expected, ids)
 
     def test_pad_not_split(self):
-        input_ids_w_pad = self.tokenizer.prepare_seq2seq_batch(["I am a small frog <pad>"]).input_ids[0].tolist()
+        input_ids_w_pad = (
+            self.tokenizer.prepare_seq2seq_batch(["I am a small frog <pad>"], return_tensors="pt")
+            .input_ids[0]
+            .tolist()
+        )
         expected_w_pad = [38, 121, 14, 697, 38848, self.tokenizer.pad_token_id, 0]  # pad
         self.assertListEqual(expected_w_pad, input_ids_w_pad)
 
@@ -294,7 +300,7 @@ def test_tokenizer_handles_empty(self):
         normalized = self.tokenizer.normalize("")
         self.assertIsInstance(normalized, str)
         with self.assertRaises(ValueError):
-            self.tokenizer.prepare_seq2seq_batch([""])
+            self.tokenizer.prepare_seq2seq_batch([""], return_tensors="pt")
 
     @slow
     def test_pipeline(self):