Skip to content

Commit

Permalink
Minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
lgai-exaone committed Nov 12, 2024
1 parent 77fd69f commit 12a9bfa
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 8 deletions.
2 changes: 2 additions & 0 deletions docs/source/en/perf_infer_gpu_one.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ FlashAttention-2 is currently supported for the following architectures:
* [GraniteMoe](https://huggingface.co/docs/transformers/model_doc/granitemoe#transformers.GraniteMoeModel)
* [Idefics2](https://huggingface.co/docs/transformers/model_doc/idefics2#transformers.Idefics2Model)
* [Idefics3](https://huggingface.co/docs/transformers/model_doc/idefics3#transformers.Idefics3Model)
* [Exaone](https://huggingface.co/docs/transformers/model_doc/exaone#transformers.ExaoneModel)
* [Falcon](https://huggingface.co/docs/transformers/model_doc/falcon#transformers.FalconModel)
* [JetMoe](https://huggingface.co/docs/transformers/model_doc/jetmoe#transformers.JetMoeModel)
* [Jamba](https://huggingface.co/docs/transformers/model_doc/jamba#transformers.JambaModel)
Expand Down Expand Up @@ -231,6 +232,7 @@ For now, Transformers supports SDPA inference and training for the following arc
* [DistilBert](https://huggingface.co/docs/transformers/model_doc/distilbert#transformers.DistilBertModel)
* [Dpr](https://huggingface.co/docs/transformers/model_doc/dpr#transformers.DprReader)
* [EncoderDecoder](https://huggingface.co/docs/transformers/model_doc/encoder_decoder#transformers.EncoderDecoderModel)
* [Exaone](https://huggingface.co/docs/transformers/model_doc/exaone#transformers.ExaoneModel)
* [Falcon](https://huggingface.co/docs/transformers/model_doc/falcon#transformers.FalconModel)
* [Gemma](https://huggingface.co/docs/transformers/model_doc/gemma#transformers.GemmaModel)
* [Gemma2](https://huggingface.co/docs/transformers/model_doc/gemma2#transformers.Gemma2Model)
Expand Down
7 changes: 1 addition & 6 deletions src/transformers/models/exaone/configuration_exaone.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,6 @@ class ExaoneConfig(PretrainedConfig):
Beginning of stream token id.
eos_token_id (`int`, *optional*, defaults to 2):
End of stream token id.
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
Whether to tie weight embeddings
Example:
Expand Down Expand Up @@ -155,7 +153,6 @@ def __init__(
use_cache=True,
bos_token_id=0,
eos_token_id=2,
tie_word_embeddings=True,
**kwargs,
):
self.vocab_size = vocab_size
Expand Down Expand Up @@ -183,6 +180,4 @@ def __init__(
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id

super().__init__(
bos_token_id=bos_token_id, eos_token_id=eos_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs
)
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
3 changes: 1 addition & 2 deletions src/transformers/models/exaone/modeling_exaone.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ def __init__(self, config: ExaoneConfig, device=None):
self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
else:
self.rope_type = "default"
self.rope_theta = config.rope_theta
self.max_seq_len = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings

Expand Down Expand Up @@ -1307,8 +1308,6 @@ def forward(
EXAONE_START_DOCSTRING,
)
class ExaoneForQuestionAnswering(ExaonePreTrainedModel):
_keys_to_ignore_on_load_missing = ["lm_head.weight"]

def __init__(self, config):
super().__init__(config)
self.num_labels = config.num_labels
Expand Down

0 comments on commit 12a9bfa

Please sign in to comment.