From 26d274b45222e2d443aff788f8b478d684d3bc2d Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 28 Jul 2022 12:02:16 -0400 Subject: [PATCH] Use main_process_first --- examples/by_feature/checkpointing.py | 12 +++++++----- examples/by_feature/cross_validation.py | 12 +++++++----- examples/by_feature/fsdp_with_peak_mem_tracking.py | 12 +++++++----- examples/by_feature/gradient_accumulation.py | 12 +++++++----- examples/by_feature/memory.py | 12 +++++++----- examples/by_feature/multi_process_metrics.py | 12 +++++++----- examples/by_feature/tracking.py | 12 +++++++----- examples/complete_nlp_example.py | 12 +++++++----- examples/nlp_example.py | 12 +++++++----- 9 files changed, 63 insertions(+), 45 deletions(-) diff --git a/examples/by_feature/checkpointing.py b/examples/by_feature/checkpointing.py index 5d31dd8f433..1b462a1f42f 100644 --- a/examples/by_feature/checkpointing.py +++ b/examples/by_feature/checkpointing.py @@ -72,11 +72,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library diff --git a/examples/by_feature/cross_validation.py b/examples/by_feature/cross_validation.py index dc41b91e571..6dd61bbf812 100644 --- a/examples/by_feature/cross_validation.py +++ b/examples/by_feature/cross_validation.py @@ -92,11 +92,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library diff --git a/examples/by_feature/fsdp_with_peak_mem_tracking.py b/examples/by_feature/fsdp_with_peak_mem_tracking.py index b95fb9908cd..a279e64eb67 100644 --- a/examples/by_feature/fsdp_with_peak_mem_tracking.py +++ b/examples/by_feature/fsdp_with_peak_mem_tracking.py @@ -127,11 +127,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library diff --git a/examples/by_feature/gradient_accumulation.py b/examples/by_feature/gradient_accumulation.py index 580d07a57a0..69856c97b1c 100644 --- a/examples/by_feature/gradient_accumulation.py +++ b/examples/by_feature/gradient_accumulation.py @@ -67,11 +67,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library diff --git a/examples/by_feature/memory.py b/examples/by_feature/memory.py index d85319552aa..91f3e41d156 100644 --- a/examples/by_feature/memory.py +++ b/examples/by_feature/memory.py @@ -72,11 +72,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library diff --git a/examples/by_feature/multi_process_metrics.py b/examples/by_feature/multi_process_metrics.py index cf581c73d17..522cc571b5c 100644 --- a/examples/by_feature/multi_process_metrics.py +++ b/examples/by_feature/multi_process_metrics.py @@ -74,11 +74,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library diff --git a/examples/by_feature/tracking.py b/examples/by_feature/tracking.py index 0da8c437ab9..d7248682dbf 100644 --- a/examples/by_feature/tracking.py +++ b/examples/by_feature/tracking.py @@ -72,11 +72,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library diff --git a/examples/complete_nlp_example.py b/examples/complete_nlp_example.py index fc0fae90ba6..000da603804 100644 --- a/examples/complete_nlp_example.py +++ b/examples/complete_nlp_example.py @@ -91,11 +91,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library diff --git a/examples/nlp_example.py b/examples/nlp_example.py index 33c0ed7aa4c..b1e7cba270a 100644 --- a/examples/nlp_example.py +++ b/examples/nlp_example.py @@ -65,11 +65,13 @@ def tokenize_function(examples): return outputs # Apply the method we just defined to all the examples in all the splits of the dataset - tokenized_datasets = datasets.map( - tokenize_function, - batched=True, - remove_columns=["idx", "sentence1", "sentence2"], - ) + # starting with the main process first: + with accelerator.main_process_first(): + tokenized_datasets = datasets.map( + tokenize_function, + batched=True, + remove_columns=["idx", "sentence1", "sentence2"], + ) # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the # transformers library