huggingface · pacman100 · Jul 8, 2022 · Jul 7, 2022 · Jul 7, 2022 · Jul 7, 2022
diff --git a/src/accelerate/data_loader.py b/src/accelerate/data_loader.py
@@ -38,6 +38,22 @@
 if is_tpu_available(check_device=False):
     import torch_xla.distributed.parallel_loader as xpl
 
+    class MpDeviceLoaderWrapper(xpl.MpDeviceLoader):
+        """
+        Wrapper for the xpl.MpDeviceLoader class. This class is used to add `total_batch_size` property to the
+        xpl.MpDeviceLoader class.
+        """
+
+        @property
+        def total_batch_size(self):
+            """
+            Get the total batch size of the dataloader. It is the resulting batch size across processes. It is same as
+            the original batch size of the dataloader when `split_batches=True`. Otherwise, it is the product of the
+            orginal batch size of the dataloader and the number of processes.
+            """
+            return self._loader.total_batch_size
+
+
 logger = get_logger(__name__)
 
 # kwargs of the DataLoader in min version 1.4.0.
@@ -321,6 +337,19 @@ def __iter__(self):
                 yield current_batch
                 break
 
+    @property
+    def total_batch_size(self):
+        """
+        Get the total batch size of the dataloader. It is the resulting batch size across processes. It is same as the
+        original batch size of the dataloader when `split_batches=True`. Otherwise, it is the product of the orginal
+        batch size of the dataloader and the number of processes.
+        """
+        return (
+            self.batch_sampler.batch_size
+            if self.batch_sampler.split_batches
+            else (self.batch_sampler.batch_size * self.batch_sampler.num_processes)
+        )
+
 
 class DataLoaderDispatcher(DataLoader):
     """
@@ -432,6 +461,17 @@ def __len__(self):
         else:
             return math.ceil(whole_length / self.state.num_processes)
 
+    @property
+    def total_batch_size(self):
+        """
+        Get the total batch size of the dataloader. It is the resulting batch size across processes. It is same as the
+        original batch size of the dataloader when `split_batches=True`. Otherwise, it is the product of the orginal
+        batch size of the dataloader and the number of processes.
+        """
+        return (
+            self.dataset.batch_size if self.split_batches else (self.dataset.batch_size * self.dataset.num_processes)
+        )
+
 
 def prepare_data_loader(
     dataloader: DataLoader,
@@ -577,7 +617,10 @@ def prepare_data_loader(
 
     if dispatch_batches:
         dataloader = DataLoaderDispatcher(
-            new_dataset, split_batches=split_batches, batch_sampler=new_batch_sampler, **kwargs
+            new_dataset,
+            split_batches=split_batches,
+            batch_sampler=new_batch_sampler,
+            **kwargs,
         )
     else:
         dataloader = DataLoaderShard(
@@ -590,5 +633,5 @@ def prepare_data_loader(
         )
 
     if state.distributed_type == DistributedType.TPU:
-        return xpl.MpDeviceLoader(dataloader, device)
+        return MpDeviceLoaderWrapper(dataloader, device)
     return dataloader