diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index b17ea387d54..552c8392c08 100644 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -747,6 +747,12 @@ def _prepare_deepspeed(self, *args): batch_size_per_device = deepspeed_plugin.deepspeed_config["train_micro_batch_size_per_gpu"] result = [obj for obj in args] + if self.gradient_accumulation_steps != deepspeed_plugin.deepspeed_config["gradient_accumulation_steps"]: + logger.info( + f"Updating DeepSpeed's gradient accumulation steps to {self.gradient_accumulation_steps} from " + f"{deepspeed_plugin.deepspeed_config['gradient_accumulation_steps']}." + ) + deepspeed_plugin.deepspeed_config["gradient_accumulation_steps"] = self.gradient_accumulation_steps config_kwargs = { "train_micro_batch_size_per_gpu": batch_size_per_device, "train_batch_size": batch_size_per_device