Skip to content

Commit

Permalink
Use main_process_first in the examples (#581)
Browse files Browse the repository at this point in the history
  • Loading branch information
muellerzr authored Jul 28, 2022
1 parent 5030571 commit 7f5c60c
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 45 deletions.
12 changes: 7 additions & 5 deletions examples/by_feature/checkpointing.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down
12 changes: 7 additions & 5 deletions examples/by_feature/cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down
12 changes: 7 additions & 5 deletions examples/by_feature/fsdp_with_peak_mem_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down
12 changes: 7 additions & 5 deletions examples/by_feature/gradient_accumulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down
12 changes: 7 additions & 5 deletions examples/by_feature/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down
12 changes: 7 additions & 5 deletions examples/by_feature/multi_process_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down
12 changes: 7 additions & 5 deletions examples/by_feature/tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down
12 changes: 7 additions & 5 deletions examples/complete_nlp_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down
12 changes: 7 additions & 5 deletions examples/nlp_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,13 @@ def tokenize_function(examples):
return outputs

# Apply the method we just defined to all the examples in all the splits of the dataset
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
# starting with the main process first:
with accelerator.main_process_first():
tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
Expand Down

0 comments on commit 7f5c60c

Please sign in to comment.