From f8d7f457ad9d22305bc54fc1cdcd71878d643e7b Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <sylvain.gugger@gmail.com>
Date: Mon, 1 Feb 2021 13:11:06 -0500
Subject: [PATCH 1/2] Initial work

---
 docs/source/main_classes/tokenizer.rst       | 4 ++++
 src/transformers/models/dpr/modeling_dpr.py  | 8 +++++---
 src/transformers/models/t5/modeling_t5.py    | 2 ++
 src/transformers/models/t5/modeling_tf_t5.py | 4 +++-
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/docs/source/main_classes/tokenizer.rst b/docs/source/main_classes/tokenizer.rst
index fd82e8f97c98..a676b6081d70 100644
--- a/docs/source/main_classes/tokenizer.rst
+++ b/docs/source/main_classes/tokenizer.rst
@@ -56,6 +56,8 @@ PreTrainedTokenizer
     :special-members: __call__
     :members:
 
+    .. automethod:: encode
+
 
 PreTrainedTokenizerFast
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -64,6 +66,8 @@ PreTrainedTokenizerFast
     :special-members: __call__
     :members:
 
+    .. automethod:: encode
+
 
 BatchEncoding
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/src/transformers/models/dpr/modeling_dpr.py b/src/transformers/models/dpr/modeling_dpr.py
index 0bc032baf473..5b855bed075c 100644
--- a/src/transformers/models/dpr/modeling_dpr.py
+++ b/src/transformers/models/dpr/modeling_dpr.py
@@ -364,9 +364,11 @@ def init_weights(self):
 
             Indices can be obtained using :class:`~transformers.DPRTokenizer`. See
             :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
-            details. attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`,
-            `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0,
-            1]``:
+            details.
+
+            `What are input IDs? <../glossary.html#input-ids>`__ attention_mask (:obj:`torch.FloatTensor` of shape
+            :obj:`(batch_size, sequence_length)`, `optional`): Mask to avoid performing attention on padding token
+            indices. Mask values selected in ``[0, 1]``:
 
             - 1 for tokens that are **not masked**,
             - 0 for tokens that are **masked**.
diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py
index bd05cf00d11d..d0f5e5d1a7a4 100644
--- a/src/transformers/models/t5/modeling_t5.py
+++ b/src/transformers/models/t5/modeling_t5.py
@@ -1041,6 +1041,8 @@ def forward(
             :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
             detail.
 
+            `What are input IDs? <../glossary.html#input-ids>`__
+
             To know more on how to prepare :obj:`input_ids` for pretraining take a look a `T5 Training
             <./t5.html#training>`__.
         attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py
index 9e6b16bfc10c..db58a10af469 100644
--- a/src/transformers/models/t5/modeling_tf_t5.py
+++ b/src/transformers/models/t5/modeling_tf_t5.py
@@ -929,7 +929,7 @@ def _shift_right(self, input_ids):
 
 T5_INPUTS_DOCSTRING = r"""
     Args:
-        inputs (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`):
+        input_ids (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`):
             Indices of input sequence tokens in the vocabulary. T5 is a model with relative position embeddings so you
             should be able to pad the inputs on the right or the left.
 
@@ -937,6 +937,8 @@ def _shift_right(self, input_ids):
             :func:`transformers.PreTrainedTokenizer.__call__` and :func:`transformers.PreTrainedTokenizer.encode` for
             details.
 
+            `What are input IDs? <../glossary.html#input-ids>`__
+
             To know more on how to prepare :obj:`inputs` for pretraining take a look at `T5 Training
             <./t5.html#training>`__.
         decoder_input_ids (:obj:`tf.Tensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):

From f33bb4f5d9cb074808b0fe4bfabc72d222e47265 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <sylvain.gugger@gmail.com>
Date: Mon, 1 Feb 2021 16:21:54 -0500
Subject: [PATCH 2/2] Fix doc styler and other models

---
 src/transformers/models/dpr/modeling_dpr.py   | 35 +++++++++++--------
 .../models/dpr/modeling_tf_dpr.py             | 12 ++++---
 src/transformers/models/rag/modeling_rag.py   |  2 ++
 utils/style_doc.py                            | 21 ++++++++++-
 4 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/src/transformers/models/dpr/modeling_dpr.py b/src/transformers/models/dpr/modeling_dpr.py
index 5b855bed075c..cb98c8fa81a0 100644
--- a/src/transformers/models/dpr/modeling_dpr.py
+++ b/src/transformers/models/dpr/modeling_dpr.py
@@ -366,28 +366,33 @@ def init_weights(self):
             :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
             details.
 
-            `What are input IDs? <../glossary.html#input-ids>`__ attention_mask (:obj:`torch.FloatTensor` of shape
-            :obj:`(batch_size, sequence_length)`, `optional`): Mask to avoid performing attention on padding token
-            indices. Mask values selected in ``[0, 1]``:
+            `What are input IDs? <../glossary.html#input-ids>`__
+        attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
 
             - 1 for tokens that are **not masked**,
             - 0 for tokens that are **masked**.
 
-            `What are attention masks? <../glossary.html#attention-mask>`__ token_type_ids (:obj:`torch.LongTensor` of
-            shape :obj:`(batch_size, sequence_length)`, `optional`): Segment token indices to indicate first and second
-            portions of the inputs. Indices are selected in ``[0, 1]``:
+            `What are attention masks? <../glossary.html#attention-mask>`__
+        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
+            1]``:
 
             - 0 corresponds to a `sentence A` token,
             - 1 corresponds to a `sentence B` token.
 
-            `What are token type IDs? <../glossary.html#token-type-ids>`_ inputs_embeds (:obj:`torch.FloatTensor` of
-            shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): Optionally, instead of passing
-            :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want
-            more control over how to convert :obj:`input_ids` indices into associated vectors than the model's internal
-            embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the
-            attentions tensors of all attention layers. See ``attentions`` under returned tensors for more detail.
-            output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers.
-            See ``hidden_states`` under returned tensors for more detail. return_dict (:obj:`bool`, `optional`):
+            `What are token type IDs? <../glossary.html#token-type-ids>`_
+        inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
+            Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
+            This is useful if you want more control over how to convert :obj:`input_ids` indices into associated
+            vectors than the model's internal embedding lookup matrix.
+        output_attentions (:obj:`bool`, `optional`):
+            Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned
+            tensors for more detail.
+        output_hidden_states (:obj:`bool`, `optional`):
+            Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for
+            more detail.
+        return_dict (:obj:`bool`, `optional`):
             Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
 """
 
@@ -405,6 +410,8 @@ def init_weights(self):
 
             Indices can be obtained using :class:`~transformers.DPRReaderTokenizer`. See this class documentation for
             more details.
+
+            `What are input IDs? <../glossary.html#input-ids>`__
         attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(n_passages, sequence_length)`, `optional`):
             Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
 
diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py
index cc595b85cda3..b060fbb28618 100644
--- a/src/transformers/models/dpr/modeling_tf_dpr.py
+++ b/src/transformers/models/dpr/modeling_tf_dpr.py
@@ -486,15 +486,17 @@ def serving(self, inputs):
 
             (a) For sequence pairs (for a pair title+text for example):
 
-                ``tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]``
+            ::
 
-                ``token_type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1``
+                tokens:         [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
+                token_type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1
 
             (b) For single sequences (for a question for example):
 
-                ``tokens: [CLS] the dog is hairy . [SEP]``
+            ::
 
-                ``token_type_ids: 0 0 0 0 0 0 0``
+                tokens:         [CLS] the dog is hairy . [SEP]
+                token_type_ids:   0   0   0   0  0     0   0
 
             DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
             rather than the left.
@@ -502,6 +504,8 @@ def serving(self, inputs):
             Indices can be obtained using :class:`~transformers.DPRTokenizer`. See
             :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
             details.
+
+            `What are input IDs? <../glossary.html#input-ids>`__
         attention_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
             Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
 
diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py
index fbb9ca330a68..3501720060a4 100644
--- a/src/transformers/models/rag/modeling_rag.py
+++ b/src/transformers/models/rag/modeling_rag.py
@@ -412,6 +412,8 @@ def from_pretrained_question_encoder_generator(
             Indices of input sequence tokens in the vocabulary. :class:`~transformers.RagConfig`, used to initialize
             the model, specifies which generator to use, it also specifies a compatible generator tokenizer. Use that
             tokenizer class to obtain the indices.
+
+            `What are input IDs? <../glossary.html#input-ids>`__
         attention_mask (:obj:`torch.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
             Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
 
diff --git a/utils/style_doc.py b/utils/style_doc.py
index 45ec776e7655..57179e6347e9 100644
--- a/utils/style_doc.py
+++ b/utils/style_doc.py
@@ -135,6 +135,14 @@ def init_in_block(self, text):
         """
         return SpecialBlock.NOT_SPECIAL
 
+    def end_of_special_style(self, line):
+        """
+        Sets back the `in_block` attribute to `NOT_SPECIAL`.
+
+        Useful for some docstrings where we may have to go back to `ARG_LIST` instead.
+        """
+        self.in_block = SpecialBlock.NOT_SPECIAL
+
     def style_paragraph(self, paragraph, max_len, no_style=False, min_indent=None):
         """
         Style `paragraph` (a list of lines) by making sure no line goes over `max_len`, except if the `no_style` flag
@@ -220,6 +228,7 @@ def style(self, text, max_len=119, min_indent=None):
         new_lines = []
         paragraph = []
         self.current_indent = ""
+        self.previous_indent = None
         # If one of those is True, the paragraph should not be touched (code samples, lists...)
         no_style = False
         no_style_next = False
@@ -251,7 +260,7 @@ def style(self, text, max_len=119, min_indent=None):
                                 self.current_indent = indent
                         elif not indent.startswith(self.current_indent):
                             # If not, we are leaving the block when we unindent.
-                            self.in_block = SpecialBlock.NOT_SPECIAL
+                            self.end_of_special_style(paragraph[0])
 
                     if self.is_special_block(paragraph[0]):
                         # Maybe we are starting a special block.
@@ -326,6 +335,8 @@ def is_comment_or_textual_block(self, line):
 
     def is_special_block(self, line):
         if self.is_no_style_block(line):
+            if self.previous_indent is None and self.in_block == SpecialBlock.ARG_LIST:
+                self.previous_indent = self.current_indent
             self.in_block = SpecialBlock.NO_STYLE
             return True
         if _re_arg_def.search(line) is not None:
@@ -333,6 +344,14 @@ def is_special_block(self, line):
             return True
         return False
 
+    def end_of_special_style(self, line):
+        if self.previous_indent is not None and line.startswith(self.previous_indent):
+            self.in_block = SpecialBlock.ARG_LIST
+            self.current_indent = self.previous_indent
+        else:
+            self.in_block = SpecialBlock.NOT_SPECIAL
+            self.previous_indent = None
+
     def init_in_block(self, text):
         lines = text.split("\n")
         while len(lines) > 0 and len(lines[0]) == 0: