From f868eb843624911656dd57585dc21ce7e5464ec6 Mon Sep 17 00:00:00 2001 From: Shuai Zheng Date: Thu, 19 Jan 2023 11:14:43 -0800 Subject: [PATCH] [Bugfix] Fix transformers import order in megatron scripts (#5) --- examples/albert/megatron_hf.py | 4 ++-- examples/bert/megatron_hf.py | 4 ++-- examples/gpt/megatron_hf.py | 5 +++-- examples/opt/megatron_hf.py | 5 +++-- examples/roberta/megatron_hf.py | 4 ++-- examples/t5/megatron_hf.py | 4 ++-- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/examples/albert/megatron_hf.py b/examples/albert/megatron_hf.py index 9d32fb2f..6e1d1f8d 100644 --- a/examples/albert/megatron_hf.py +++ b/examples/albert/megatron_hf.py @@ -12,6 +12,8 @@ import torch import torch.nn.functional as F +from transformers import AutoConfig, AlbertModel + from megatron import get_args from megatron import print_rank_0 from megatron import get_timers @@ -35,8 +37,6 @@ def get_model( impl="slapo", delay_init=True, ): - from transformers import AutoConfig, AlbertModel - config = AutoConfig.from_pretrained(model_name) if padded_vocab_size is not None: config.vocab_size = padded_vocab_size diff --git a/examples/bert/megatron_hf.py b/examples/bert/megatron_hf.py index d36e44ed..a21cdce2 100644 --- a/examples/bert/megatron_hf.py +++ b/examples/bert/megatron_hf.py @@ -12,6 +12,8 @@ import torch import torch.nn.functional as F +from transformers import AutoConfig, BertModel + from megatron import get_args from megatron import print_rank_0 from megatron import get_timers @@ -35,8 +37,6 @@ def get_model( impl="slapo", delay_init=True, ): - from transformers import AutoConfig, BertModel - config = AutoConfig.from_pretrained(model_name) if padded_vocab_size is not None: config.vocab_size = padded_vocab_size diff --git a/examples/gpt/megatron_hf.py b/examples/gpt/megatron_hf.py index ee60c028..85175e9f 100644 --- a/examples/gpt/megatron_hf.py +++ b/examples/gpt/megatron_hf.py @@ -7,6 +7,9 @@ import os import torch + +from transformers import AutoConfig, GPTNeoModel + from functools import partial from megatron import get_args from megatron import print_rank_0 @@ -30,8 +33,6 @@ def get_model( impl="slapo", delay_init=True, ): - from transformers import AutoConfig, GPTNeoModel - config = AutoConfig.from_pretrained(model_name) if padded_vocab_size is not None: config.vocab_size = padded_vocab_size diff --git a/examples/opt/megatron_hf.py b/examples/opt/megatron_hf.py index e7ab09de..18d53fec 100644 --- a/examples/opt/megatron_hf.py +++ b/examples/opt/megatron_hf.py @@ -7,6 +7,9 @@ import os import torch + +from transformers import AutoConfig, OPTModel + from functools import partial from megatron import get_args from megatron import print_rank_0 @@ -30,8 +33,6 @@ def get_model( impl="slapo", delay_init=True, ): - from transformers import AutoConfig, OPTModel - config = AutoConfig.from_pretrained(model_name) if padded_vocab_size is not None: config.vocab_size = padded_vocab_size diff --git a/examples/roberta/megatron_hf.py b/examples/roberta/megatron_hf.py index d21d0c5b..5f0f85a5 100644 --- a/examples/roberta/megatron_hf.py +++ b/examples/roberta/megatron_hf.py @@ -9,6 +9,8 @@ import torch import torch.nn.functional as F +from transformers import AutoConfig, RobertaModel + from megatron import get_args from megatron import print_rank_0 from megatron import get_timers @@ -32,8 +34,6 @@ def get_model( impl="slapo", delay_init=True, ): - from transformers import AutoConfig, RobertaModel - config = AutoConfig.from_pretrained(model_name) if padded_vocab_size is not None: config.vocab_size = padded_vocab_size diff --git a/examples/t5/megatron_hf.py b/examples/t5/megatron_hf.py index 7af8f5d4..028dba63 100644 --- a/examples/t5/megatron_hf.py +++ b/examples/t5/megatron_hf.py @@ -10,6 +10,8 @@ import torch +from transformers import AutoConfig, T5Model + from megatron import get_args, get_timers, mpu, print_rank_0 from megatron.data.dataset_utils import build_train_valid_test_datasets from megatron.model import ModelType @@ -58,8 +60,6 @@ def get_model( impl="slapo", delay_init=True, ): - from transformers import AutoConfig, T5Model - config = AutoConfig.from_pretrained(model_name) config.vocab_size = padded_vocab_size config.use_cache = False