From ab6b13d98f1803cd77b2baa0982a98b58519400c Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 1 Feb 2023 16:14:17 +0000 Subject: [PATCH 1/6] mvp --- src/transformers/generation/utils.py | 43 +++++++--------------------- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index d18a82d17fef..15c7d88e10cf 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -519,47 +519,18 @@ def _prepare_model_inputs( inputs_kwarg = model_kwargs.pop(input_name, None) if inputs_kwarg is not None and inputs is not None: raise ValueError( - f"`inputs`: {inputs}` were passed alongside " - f"{input_name} which is not allowed." + f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed." f"Make sure to either pass {inputs} or {input_name}=..." ) elif inputs_kwarg is not None: inputs = inputs_kwarg - # 3. models with `input_ids` can also make use of `inputs_embeds` - if self._can_retrieve_inputs_from_name(inputs, "inputs_embeds", model_kwargs): - inputs, input_name = model_kwargs["inputs_embeds"], "inputs_embeds" - - # 4. Only encoder-decoder models can have non `input_ids` input format - if not self.config.is_encoder_decoder and input_name != "input_ids": - raise ValueError( - f"If {input_name} is passed as model-specific keyword " - "input then model has to be an encoder-decoder and not a " - f"{self.__class__.__name__}." - ) - - # 5. if `inputs` is still None, try to create `input_ids` from BOS token + # 3. if `inputs` is still None, try to create `input_ids` from BOS token if inputs is None: inputs = self._prepare_input_ids_for_generation(bos_token_id, model_kwargs.get("encoder_outputs")) return inputs, input_name, model_kwargs - def _can_retrieve_inputs_from_name( - self, inputs: Optional[torch.Tensor], name: str, model_kwargs: Dict[str, torch.Tensor] - ) -> torch.Tensor: - """ - If `inputs` is None and `name` is in both forward function and keyword arguments, then inputs can be retrieved - from name - """ - can_retrieve_inputs = model_kwargs.get(name, None) is not None and name in set( - inspect.signature(self.forward).parameters.keys() - ) - - if can_retrieve_inputs and inputs is not None: - raise ValueError(f"Cannot only pass one of {name} and {self.main_input_name}") - - return can_retrieve_inputs - def adjust_logits_during_generation(self, logits: torch.FloatTensor, **kwargs) -> torch.FloatTensor: """ Implement in subclasses of [`PreTrainedModel`] for custom behavior to adjust the logits in the generate method. @@ -611,9 +582,15 @@ def _prepare_encoder_decoder_kwargs_for_generation( } # 3. make sure that encoder returns `ModelOutput` - model_input_name = model_input_name if model_input_name is not None else self.main_input_name + accepts_inputs_embeds = "inputs_embeds" in set(inspect.signature(self.forward).parameters.keys()) + has_inputs_embeds = "inputs_embeds" in model_kwargs + if accepts_inputs_embeds and has_inputs_embeds: + encoder_kwargs["inputs_embeds"] = model_kwargs["inputs_embeds"] + else: + model_input_name = model_input_name if model_input_name is not None else self.main_input_name + encoder_kwargs[model_input_name] = inputs_tensor + encoder_kwargs["return_dict"] = True - encoder_kwargs[model_input_name] = inputs_tensor model_kwargs["encoder_outputs"]: ModelOutput = encoder(**encoder_kwargs) return model_kwargs From 7cf78029ad172c382a70d5c1c5b61b00790ea4dc Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 1 Feb 2023 20:09:35 +0000 Subject: [PATCH 2/6] gpt2 accepts input_embeds --- src/transformers/models/gpt2/modeling_gpt2.py | 25 +++++++++++++------ tests/generation/test_utils.py | 22 ++++++++++++++++ 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 5fe33bbca509..2d2f23223890 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -1000,14 +1000,23 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwarg position_ids = position_ids[:, -1].unsqueeze(-1) else: position_ids = None - return { - "input_ids": input_ids, - "past_key_values": past_key_values, - "use_cache": kwargs.get("use_cache"), - "position_ids": position_ids, - "attention_mask": attention_mask, - "token_type_ids": token_type_ids, - } + + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if "inputs_embeds" in kwargs and past_key_values is None: + model_inputs = {"inputs_embeds": kwargs.get("inputs_embeds", None)} + else: + model_inputs = {"input_ids": input_ids} + + model_inputs.update( + { + "past_key_values": past_key_values, + "use_cache": kwargs.get("use_cache"), + "position_ids": position_ids, + "attention_mask": attention_mask, + "token_type_ids": token_type_ids, + } + ) + return model_inputs @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 1546d14b438a..bc586f253f8f 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -3128,3 +3128,25 @@ def test_eos_token_id_int_and_list_beam_search(self): eos_token_id = [873] generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs) self.assertTrue(expectation == len(generated_tokens[0])) + + def test_generate_from_input_embeds_decoder_only(self): + model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") + + text = "Hello world" + input_ids = tokenizer.encode(text, return_tensors="pt") + + # Traditional way of generating text + outputs_from_ids = model.generate(input_ids) + + # Same thing, but from input embeddings + inputs_embeds = model.transformer.wte(input_ids) + outputs_from_embeds = model.generate(input_ids, inputs_embeds=inputs_embeds) + self.assertListEqual(outputs_from_ids.tolist(), outputs_from_embeds.tolist()) + + # But if we pass different inputs_embeds, we should get different outputs + torch.manual_seed(0) + random_embeds = torch.rand_like(inputs_embeds) + outputs_from_rand_embeds = model.generate(input_ids, inputs_embeds=random_embeds) + with self.assertRaises(AssertionError): + self.assertListEqual(outputs_from_rand_embeds.tolist(), outputs_from_embeds.tolist()) From cb1bb5abde6c3a3924772afd53eb5a9318be1dd8 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 1 Feb 2023 20:13:07 +0000 Subject: [PATCH 3/6] remove unused test --- src/transformers/models/gpt2/modeling_gpt2.py | 6 +++--- tests/generation/test_utils.py | 12 +----------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 2d2f23223890..1a7ba62c4146 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -981,7 +981,7 @@ def get_output_embeddings(self): def set_output_embeddings(self, new_embeddings): self.lm_head = new_embeddings - def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): + def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs): token_type_ids = kwargs.get("token_type_ids", None) # only last token for inputs_ids if past is defined in kwargs if past_key_values: @@ -1002,8 +1002,8 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwarg position_ids = None # if `inputs_embeds` are passed, we only want to use them in the 1st generation step - if "inputs_embeds" in kwargs and past_key_values is None: - model_inputs = {"inputs_embeds": kwargs.get("inputs_embeds", None)} + if inputs_embeds is not None and past_key_values is None: + model_inputs = {"inputs_embeds": inputs_embeds} else: model_inputs = {"input_ids": input_ids} diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index bc586f253f8f..8558525b2be4 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -2359,17 +2359,6 @@ def test_encoder_decoder_generate_attention_mask(self): self.assertTrue(diff < 1e-4) - def test_decoder_generate_with_inputs_embeds(self): - article = """I need input_ids to generate""" - tokenizer = GPT2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model = GPT2LMHeadModel.from_pretrained("hf-internal-testing/tiny-random-gpt2", max_length=5).to(torch_device) - input_ids = tokenizer(article, return_tensors="pt").input_ids.to(torch_device) - inputs_embeds = model.get_input_embeddings()(input_ids) - - # cannot generate from `inputs_embeds` for decoder only - with self.assertRaises(ValueError): - model.generate(inputs_embeds=inputs_embeds) - def test_generate_input_ids_as_kwarg(self): article = """I need input_ids to generate""" tokenizer = GPT2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") @@ -3130,6 +3119,7 @@ def test_eos_token_id_int_and_list_beam_search(self): self.assertTrue(expectation == len(generated_tokens[0])) def test_generate_from_input_embeds_decoder_only(self): + # Note: the model must support generation from input embeddings model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2") tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") From 53649c5c053ba087c748ad0a2dd43a6227b90279 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 1 Feb 2023 20:35:44 +0000 Subject: [PATCH 4/6] throw informative exception on other models --- src/transformers/generation/utils.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 15c7d88e10cf..8aa365cc22d3 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -525,7 +525,20 @@ def _prepare_model_inputs( elif inputs_kwarg is not None: inputs = inputs_kwarg - # 3. if `inputs` is still None, try to create `input_ids` from BOS token + # 3. Text decoder-only models should complain if the user attempts to pass `inputs_embeds`, but the model + # doesn't have its forwarding implemented + if not self.config.is_encoder_decoder and input_name == "input_ids" and "inputs_embeds" in model_kwargs: + has_inputs_embeds_forwarding = "inputs_embeds" in set( + inspect.signature(self.prepare_inputs_for_generation).parameters.keys() + ) + if not has_inputs_embeds_forwarding: + raise ValueError( + f"You passed `inputs_embeds` to `.generate()`, but the model class {self.__class__.__name__} " + "doesn't have its forwarding implemented. See the GPT2 implementation for an example " + "(https://github.com/huggingface/transformers/pull/21405), and feel free to open a PR with it :)" + ) + + # 4. if `inputs` is still None, try to create `input_ids` from BOS token if inputs is None: inputs = self._prepare_input_ids_for_generation(bos_token_id, model_kwargs.get("encoder_outputs")) From c85df70383d171e9c4285be59c03b1a5e67eb73a Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 1 Feb 2023 21:07:20 +0000 Subject: [PATCH 5/6] working encoder-decoder from inputs_embeds --- src/transformers/generation/utils.py | 41 +++++++++++++++------------- tests/generation/test_utils.py | 8 ------ 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 8aa365cc22d3..db194ab91a1c 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -525,18 +525,27 @@ def _prepare_model_inputs( elif inputs_kwarg is not None: inputs = inputs_kwarg - # 3. Text decoder-only models should complain if the user attempts to pass `inputs_embeds`, but the model - # doesn't have its forwarding implemented - if not self.config.is_encoder_decoder and input_name == "input_ids" and "inputs_embeds" in model_kwargs: - has_inputs_embeds_forwarding = "inputs_embeds" in set( - inspect.signature(self.prepare_inputs_for_generation).parameters.keys() - ) - if not has_inputs_embeds_forwarding: - raise ValueError( - f"You passed `inputs_embeds` to `.generate()`, but the model class {self.__class__.__name__} " - "doesn't have its forwarding implemented. See the GPT2 implementation for an example " - "(https://github.com/huggingface/transformers/pull/21405), and feel free to open a PR with it :)" + # 3. In the presence of `inputs_embeds` for text models: + # - decoder-only models should complain if the user attempts to pass `inputs_embeds`, but the model + # doesn't have its forwarding implemented. `inputs_embeds` is kept in `model_kwargs` and can coexist with + # input_ids (`inputs_embeds` will be used in the 1st generation step, as opposed to `input_ids`) + # - encoder-decoder models should complain if the user attempts to pass `inputs_embeds` and `input_ids`, and + # pull the former to inputs. It will be used in place of `input_ids` to get the encoder hidden states. + if input_name == "input_ids" and "inputs_embeds" in model_kwargs: + if not self.config.is_encoder_decoder: + has_inputs_embeds_forwarding = "inputs_embeds" in set( + inspect.signature(self.prepare_inputs_for_generation).parameters.keys() ) + if not has_inputs_embeds_forwarding: + raise ValueError( + f"You passed `inputs_embeds` to `.generate()`, but the model class {self.__class__.__name__} " + "doesn't have its forwarding implemented. See the GPT2 implementation for an example " + "(https://github.com/huggingface/transformers/pull/21405), and feel free to open a PR with it!" + ) + else: + if inputs is not None: + raise ValueError("You passed `inputs_embeds` and `input_ids` to `.generate()`. Please pick one.") + inputs, input_name = model_kwargs["inputs_embeds"], "inputs_embeds" # 4. if `inputs` is still None, try to create `input_ids` from BOS token if inputs is None: @@ -595,15 +604,9 @@ def _prepare_encoder_decoder_kwargs_for_generation( } # 3. make sure that encoder returns `ModelOutput` - accepts_inputs_embeds = "inputs_embeds" in set(inspect.signature(self.forward).parameters.keys()) - has_inputs_embeds = "inputs_embeds" in model_kwargs - if accepts_inputs_embeds and has_inputs_embeds: - encoder_kwargs["inputs_embeds"] = model_kwargs["inputs_embeds"] - else: - model_input_name = model_input_name if model_input_name is not None else self.main_input_name - encoder_kwargs[model_input_name] = inputs_tensor - + model_input_name = model_input_name if model_input_name is not None else self.main_input_name encoder_kwargs["return_dict"] = True + encoder_kwargs[model_input_name] = inputs_tensor model_kwargs["encoder_outputs"]: ModelOutput = encoder(**encoder_kwargs) return model_kwargs diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 8558525b2be4..a63457c02137 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -2404,14 +2404,6 @@ def test_generate_inputs_and_encoder_kwargs(self): with self.assertRaises(ValueError): model.generate(input_ids, input_ids=input_ids) - def test_generate_too_many_encoder_kwargs(self): - article = """I need input_ids to generate""" - tokenizer = GPT2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model = GPT2LMHeadModel.from_pretrained("hf-internal-testing/tiny-random-gpt2", max_length=10).to(torch_device) - input_ids = tokenizer(article, return_tensors="pt").input_ids.to(torch_device) - with self.assertRaises(ValueError): - model.generate(input_ids=input_ids, inputs_embeds=input_ids) - def test_generate_input_values_as_encoder_kwarg(self): input_values = floats_tensor((2, 250)) model = SpeechEncoderDecoderModel.from_pretrained("hf-internal-testing/tiny-random-speech-encoder-decoder") From 9846b4567661c58e9aa9f4364becdf1e34f7fdf6 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 1 Feb 2023 21:17:54 +0000 Subject: [PATCH 6/6] re add test --- tests/generation/test_utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index a63457c02137..7a57a06afe54 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -2404,6 +2404,16 @@ def test_generate_inputs_and_encoder_kwargs(self): with self.assertRaises(ValueError): model.generate(input_ids, input_ids=input_ids) + def test_generate_too_many_encoder_kwargs(self): + article = """I need input_ids to generate""" + tokenizer = BartTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart") + model = BartForConditionalGeneration.from_pretrained("hf-internal-testing/tiny-random-bart", max_length=10).to( + torch_device + ) + input_ids = tokenizer(article, return_tensors="pt").input_ids.to(torch_device) + with self.assertRaises(ValueError): + model.generate(input_ids=input_ids, inputs_embeds=input_ids) + def test_generate_input_values_as_encoder_kwarg(self): input_values = floats_tensor((2, 250)) model = SpeechEncoderDecoderModel.from_pretrained("hf-internal-testing/tiny-random-speech-encoder-decoder")