Skip to content

Commit

Permalink
Improve pytorch examples for fp16 (#9796)
Browse files Browse the repository at this point in the history
* Pad to 8x for fp16 multiple choice example (#9752)

* Pad to 8x for fp16 squad trainer example (#9752)

* Pad to 8x for fp16 ner example (#9752)

* Pad to 8x for fp16 swag example (#9752)

* Pad to 8x for fp16 qa beam search example (#9752)

* Pad to 8x for fp16 qa example (#9752)

* Pad to 8x for fp16 seq2seq example (#9752)

* Pad to 8x for fp16 glue example (#9752)

* Pad to 8x for fp16 new ner example (#9752)

* update script template #9752

* Update examples/multiple-choice/run_swag.py

Co-authored-by: Sylvain Gugger <[email protected]>

* Update examples/question-answering/run_qa.py

Co-authored-by: Sylvain Gugger <[email protected]>

* Update examples/question-answering/run_qa_beam_search.py

Co-authored-by: Sylvain Gugger <[email protected]>

* improve code quality #9752

Co-authored-by: Sylvain Gugger <[email protected]>
  • Loading branch information
ak314 and sgugger authored Jan 26, 2021
1 parent 781e4b1 commit 10e5f28
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 9 deletions.
5 changes: 5 additions & 0 deletions examples/legacy/multiple_choice/run_multiple_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
AutoConfig,
AutoModelForMultipleChoice,
AutoTokenizer,
DataCollatorWithPadding,
EvalPrediction,
HfArgumentParser,
Trainer,
Expand Down Expand Up @@ -188,13 +189,17 @@ def compute_metrics(p: EvalPrediction) -> Dict:
preds = np.argmax(p.predictions, axis=1)
return {"acc": simple_accuracy(preds, p.label_ids)}

# Data collator
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) if training_args.fp16 else None

# Initialize our Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
data_collator=data_collator,
)

# Training
Expand Down
13 changes: 12 additions & 1 deletion examples/legacy/question-answering/run_squad_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@
from typing import Optional

import transformers
from transformers import AutoConfig, AutoModelForQuestionAnswering, AutoTokenizer, HfArgumentParser, SquadDataset
from transformers import (
AutoConfig,
AutoModelForQuestionAnswering,
AutoTokenizer,
DataCollatorWithPadding,
HfArgumentParser,
SquadDataset,
)
from transformers import SquadDataTrainingArguments as DataTrainingArguments
from transformers import Trainer, TrainingArguments
from transformers.trainer_utils import is_main_process
Expand Down Expand Up @@ -145,12 +152,16 @@ def main():
else None
)

# Data collator
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) if training_args.fp16 else None

# Initialize our Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
data_collator=data_collator,
)

# Training
Expand Down
5 changes: 5 additions & 0 deletions examples/legacy/token-classification/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
AutoConfig,
AutoModelForTokenClassification,
AutoTokenizer,
DataCollatorWithPadding,
EvalPrediction,
HfArgumentParser,
Trainer,
Expand Down Expand Up @@ -237,13 +238,17 @@ def compute_metrics(p: EvalPrediction) -> Dict:
"f1": f1_score(out_label_list, preds_list),
}

# Data collator
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) if training_args.fp16 else None

# Initialize our Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
data_collator=data_collator,
)

# Training
Expand Down
4 changes: 3 additions & 1 deletion examples/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,9 @@ def preprocess_function(examples):

# Data collator
data_collator = (
default_data_collator if data_args.pad_to_max_length else DataCollatorForMultipleChoice(tokenizer=tokenizer)
default_data_collator
if data_args.pad_to_max_length
else DataCollatorForMultipleChoice(tokenizer=tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
)

# Metric
Expand Down
6 changes: 5 additions & 1 deletion examples/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,11 @@ def prepare_validation_features(examples):
# Data collator
# We have already padded to max length if the corresponding flag is True, otherwise we need to pad in the data
# collator.
data_collator = default_data_collator if data_args.pad_to_max_length else DataCollatorWithPadding(tokenizer)
data_collator = (
default_data_collator
if data_args.pad_to_max_length
else DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
)

# Post-processing:
def post_processing_function(examples, features, predictions):
Expand Down
6 changes: 5 additions & 1 deletion examples/question-answering/run_qa_beam_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,11 @@ def prepare_validation_features(examples):
# Data collator
# We have already padded to max length if the corresponding flag is True, otherwise we need to pad in the data
# collator.
data_collator = default_data_collator if data_args.pad_to_max_length else DataCollatorWithPadding(tokenizer)
data_collator = (
default_data_collator
if data_args.pad_to_max_length
else DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
)

# Post-processing:
def post_processing_function(examples, features, predictions):
Expand Down
6 changes: 5 additions & 1 deletion examples/seq2seq/run_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,11 @@ def preprocess_function(examples):
if data_args.pad_to_max_length:
data_collator = default_data_collator
else:
data_collator = DataCollatorForSeq2Seq(tokenizer, label_pad_token_id=label_pad_token_id)
data_collator = DataCollatorForSeq2Seq(
tokenizer,
label_pad_token_id=label_pad_token_id,
pad_to_multiple_of=8 if training_args.fp16 else None,
)

# Metric
metric_name = "rouge" if data_args.task.startswith("summarization") else "sacrebleu"
Expand Down
12 changes: 10 additions & 2 deletions examples/text-classification/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
AutoConfig,
AutoModelForSequenceClassification,
AutoTokenizer,
DataCollatorWithPadding,
EvalPrediction,
HfArgumentParser,
PretrainedConfig,
Expand Down Expand Up @@ -375,6 +376,14 @@ def compute_metrics(p: EvalPrediction):
else:
return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}

# Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
if data_args.pad_to_max_length:
data_collator = default_data_collator
elif training_args.fp16:
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)
else:
data_collator = None

# Initialize our Trainer
trainer = Trainer(
model=model,
Expand All @@ -383,8 +392,7 @@ def compute_metrics(p: EvalPrediction):
eval_dataset=eval_dataset if training_args.do_eval else None,
compute_metrics=compute_metrics,
tokenizer=tokenizer,
# Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
data_collator=default_data_collator if data_args.pad_to_max_length else None,
data_collator=data_collator,
)

# Training
Expand Down
2 changes: 1 addition & 1 deletion examples/token-classification/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def tokenize_and_align_labels(examples):
)

# Data collator
data_collator = DataCollatorForTokenClassification(tokenizer)
data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)

# Metrics
metric = load_metric("seqeval")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
AutoConfig,
{{cookiecutter.model_class}},
AutoTokenizer,
DataCollatorWithPadding,
HfArgumentParser,
Trainer,
TrainingArguments,
Expand Down Expand Up @@ -323,7 +324,7 @@ def tokenize_function(examples):
)

# Data collator
data_collator=default_data_collator
data_collator=default_data_collator if not training_args.fp16 else DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)

# Initialize our Trainer
trainer = Trainer(
Expand Down

0 comments on commit 10e5f28

Please sign in to comment.