Lightning-AI · EliaCereda · Nov 17, 2020 · Nov 17, 2020 · Nov 17, 2020 · Nov 17, 2020
@@ -52,6 +52,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added `F1` class metric ([#4656](https://github.com/PyTorchLightning/pytorch-lightning/pull/4656))
 
 
+- Added `Trainer.validate()` method to perform one evaluation epoch over the validation set (
+    [#4707](https://github.com/PyTorchLightning/pytorch-lightning/pull/4707))
+
+
 ### Changed
 
 - Consistently use `step=trainer.global_step` in `LearningRateMonitor` independently of `logging_interval` ([#4376](https://github.com/PyTorchLightning/pytorch-lightning/pull/4376))

@@ -148,14 +148,27 @@ So you can run it like so:
 
 ------------
 
+Validation
+----------
+You can perform an evaluation epoch over the validation set, outside of the training loop,
+using :meth:`pytorch_lightning.trainer.trainer.Trainer.validate`. This might be
+useful if you want to collect new metrics from a model right at its initialization
+or that has already been trained.
+
+.. code-block:: python
+
+    trainer.validate(val_dataloaders=val_dataloaders)
+
+------------
+
 Testing
 -------
 Once you're done training, feel free to run the test set!
 (Only right before publishing your paper or pushing to production)
 
 .. code-block:: python
 
-    trainer.test(test_dataloader=test_dataloader)
+    trainer.test(test_dataloaders=test_dataloaders)
 
 ------------
 

@@ -59,9 +59,9 @@ def barrier(self, name: Optional[str] = None):
     def broadcast(self, obj, src=0):
         return obj
 
-    def train_or_test(self):
-        if self.trainer.testing:
-            results = self.trainer.run_test()
+    def train_or_evaluate(self):
+        if self.trainer.evaluating:
+            results = self.trainer.run_test_or_validate()
         else:
             results = self.trainer.train()
         return results
@@ -160,7 +160,7 @@ def early_stopping_should_stop(self, pl_module):
         return self.trainer.should_stop
 
     def setup_optimizers(self, model):
-        if self.trainer.testing is True:
+        if self.trainer.evaluating:
             return
 
         optimizers, lr_schedulers, optimizer_frequencies = self.trainer.init_optimizers(model)

@@ -55,8 +55,8 @@ def train(self):
         # set up training routine
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
         return results
 
     def training_step(self, args):

@@ -183,8 +183,8 @@ def ddp_train(self, process_idx, mp_queue, model):
         # set up training routine
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
 
         # clean up memory
         torch.cuda.empty_cache()

@@ -280,8 +280,8 @@ def ddp_train(self, process_idx, model):
         self.barrier('ddp_setup')
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
 
         # clean up memory
         torch.cuda.empty_cache()

@@ -143,8 +143,8 @@ def ddp_train(self, process_idx, mp_queue, model):
         # set up training routine
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
 
         # get original model
         model = self.trainer.get_model()

@@ -177,8 +177,8 @@ def ddp_train(self, process_idx, model):
         # set up training routine
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
 
         # clean up memory
         torch.cuda.empty_cache()

@@ -154,8 +154,8 @@ def ddp_train(self, process_idx, mp_queue, model, is_master=False, proc_offset=0
         # set up training routine
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
 
         # get original model
         model = self.trainer.get_model()

@@ -104,8 +104,8 @@ def train(self):
         # set up training routine
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
 
         return results
 

@@ -60,8 +60,9 @@ def train(self):
         # set up training routine
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
+
         return results
 
     def training_step(self, args):

@@ -109,8 +109,8 @@ def train(self):
             # set up training routine
             self.trainer.train_loop.setup_training(self.trainer.model)
 
-            # train or test
-            results = self.train_or_test()
+            # train or evaluate
+            results = self.train_or_evaluate()
 
         # Make sure all workers have finished training before returning to the user
         hvd.join()

@@ -133,8 +133,8 @@ def tpu_train_in_process(self, tpu_core_idx: int, model: LightningModule, traine
         # set up training routine
         self.trainer.train_loop.setup_training(model)
 
-        # train or test
-        results = self.train_or_test()
+        # train or evaluate
+        results = self.train_or_evaluate()
 
         # save weights at the end of training
         self.__save_end_of_training_weights(model, trainer)

@@ -28,11 +28,11 @@ class Callback(abc.ABC):
     """
 
     def setup(self, trainer, pl_module, stage: str):
-        """Called when fit or test begins"""
+        """Called when fit, validate, or test begins"""
         pass
 
     def teardown(self, trainer, pl_module, stage: str):
-        """Called when fit or test ends"""
+        """Called when fit, validate, or test ends"""
         pass
 
     def on_init_start(self, trainer):

@@ -138,13 +138,13 @@ def on_load_checkpoint(self, checkpointed_state):
         self.patience = checkpointed_state['patience']
 
     def on_validation_end(self, trainer, pl_module):
-        if trainer.running_sanity_check:
+        if trainer.running_sanity_check or trainer.evaluating:
             return
 
         self._run_early_stopping_check(trainer, pl_module)
 
     def on_validation_epoch_end(self, trainer, pl_module):
-        if trainer.running_sanity_check:
+        if trainer.running_sanity_check or trainer.evaluating:
             return
 
         if self._validate_condition_metric(trainer.logger_connector.callback_metrics):

@@ -220,6 +220,7 @@ def save_checkpoint(self, trainer, pl_module):
             or self.period < 1  # no models are saved
             or (epoch + 1) % self.period  # skip epoch
             or trainer.running_sanity_check  # don't save anything during sanity check
+            or trainer.evaluating  # don't save anything during evaluation: might delete the checkpoint being evaluated
             or self.last_global_step_saved == global_step  # already saved at the last step
         ):
             return

@@ -282,9 +282,13 @@ def init_train_tqdm(self) -> tqdm:
 
     def init_validation_tqdm(self) -> tqdm:
         """ Override this to customize the tqdm bar for validation. """
+
+        # The main progress bar doesn't exist in trainer.validate(...)
+        has_main_bar = int(self.main_progress_bar is not None)
+
         bar = tqdm(
             desc='Validating',
-            position=(2 * self.process_position + 1),
+            position=(2 * self.process_position + has_main_bar),
             disable=self.is_disabled,
             leave=False,
             dynamic_ncols=True,
@@ -341,19 +345,29 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, data
     def on_validation_start(self, trainer, pl_module):
         super().on_validation_start(trainer, pl_module)
         if not trainer.running_sanity_check:
-            self._update_bar(self.main_progress_bar)  # fill up remaining
+            # The main progress bar doesn't exist in trainer.validate(...)
+            if self.main_progress_bar is not None:
+                self._update_bar(self.main_progress_bar)  # fill up remaining
+
             self.val_progress_bar = self.init_validation_tqdm()
             self.val_progress_bar.total = convert_inf(self.total_val_batches)
 
     def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
         super().on_validation_batch_end(trainer, pl_module, outputs, batch, batch_idx, dataloader_idx)
         if self._should_update(self.val_batch_idx, self.total_val_batches):
             self._update_bar(self.val_progress_bar)
-            self._update_bar(self.main_progress_bar)
+
+            # The main progress bar doesn't exist in trainer.validate(...)
+            if self.main_progress_bar is not None:
+                self._update_bar(self.main_progress_bar)
 
     def on_validation_end(self, trainer, pl_module):
         super().on_validation_end(trainer, pl_module)
-        self.main_progress_bar.set_postfix(trainer.progress_bar_dict)
+
+        # The main progress bar doesn't exist in trainer.validate(...)
+        if self.main_progress_bar is not None:
+            self.main_progress_bar.set_postfix(trainer.progress_bar_dict)
+
         self.val_progress_bar.close()
 
     def on_train_end(self, trainer, pl_module):

@@ -76,13 +76,16 @@ def wrapped_fn(*args, **kwargs):
         if fn.__name__ == "setup":
 
             # Get stage either by grabbing from args or checking kwargs.
-            # If not provided, set call status of 'fit' and 'test' to True.
+            # If not provided, set call status of 'fit', 'validation', and 'test' to True.
             # We do this so __attach_datamodule in trainer.py doesn't mistakenly call setup('test') on trainer.test()
             stage = args[1] if len(args) > 1 else kwargs.get("stage", None)
 
             if stage == "fit" or stage is None:
                 obj._has_setup_fit = True
 
+            if stage == "validation" or stage is None:
+                obj._has_setup_validation = True
+
             if stage == "test" or stage is None:
                 obj._has_setup_test = True
 
@@ -155,6 +158,7 @@ def __init__(
         # Private attrs to keep track of whether or not data hooks have been called yet
         self._has_prepared_data = False
         self._has_setup_fit = False
+        self._has_setup_validation = False
         self._has_setup_test = False
 
     @property
@@ -230,6 +234,15 @@ def has_setup_fit(self):
         """
         return self._has_setup_fit
 
+    @property
+    def has_setup_validation(self):
+        """Return bool letting you know if datamodule.setup('validation') has been called or not.
+
+        Returns:
+            bool: True if datamodule.setup('validation') has been called. False by default.
+        """
+        return self._has_setup_validation
+
     @property
     def has_setup_test(self):
         """Return bool letting you know if datamodule.setup('test') has been called or not.

@@ -33,12 +33,12 @@ class ModelHooks:
     """Hooks to be used in LightningModule."""
     def setup(self, stage: str):
         """
-        Called at the beginning of fit and test.
+        Called at the beginning of fit (training + validation), validation, and test.
         This is a good hook when you need to build models dynamically or adjust something about them.
         This hook is called on every process when using DDP.
 
         Args:
-            stage: either 'fit' or 'test'
+            stage: either 'fit', 'validation', or 'test'
 
         Example::
 
@@ -61,10 +61,10 @@ def setup(stage):
 
     def teardown(self, stage: str):
         """
-        Called at the end of fit and test.
+        Called at the end of fit (training + validation), validation, and test.
 
         Args:
-            stage: either 'fit' or 'test'
+            stage: either 'fit', 'validation', or 'test'
         """
 
     def on_fit_start(self):

@@ -31,12 +31,12 @@ def verify_loop_configurations(self, model: LightningModule):
             model: The model to check the configuration.
 
         """
-        if not self.trainer.testing:
+        if not self.trainer.evaluating:
             self.__verify_train_loop_configuration(model)
             self.__verify_eval_loop_configuration(model, 'validation')
         else:
-            # check test loop configuration
-            self.__verify_eval_loop_configuration(model, 'test')
+            # check evaluation loop configurations
+            self.__verify_eval_loop_configuration(model, self.trainer.evaluating)
 
     def __verify_train_loop_configuration(self, model):
         # -----------------------------------

@@ -260,7 +260,7 @@ def prepare_eval_loop_results(self):
         for dl_idx in range(self.trainer.evaluation_loop.num_dataloaders):
             self.add_to_eval_loop_results(dl_idx, has_been_initialized)
 
-    def get_evaluate_epoch_results(self, test_mode):
+    def get_evaluate_epoch_results(self):
         if not self.trainer.running_sanity_check:
             # log all the metrics as a single dict
             metrics_to_log = self.cached_results.get_epoch_log_metrics()
@@ -269,11 +269,11 @@ def get_evaluate_epoch_results(self, test_mode):
 
         self.prepare_eval_loop_results()
 
-        # log results of test
-        if test_mode and self.trainer.is_global_zero and self.trainer.verbose_test:
+        # log results of evaluation
+        if self.trainer.evaluating and self.trainer.is_global_zero and self.trainer.verbose_evaluate:
             print('-' * 80)
             for result_idx, results in enumerate(self.eval_loop_results):
-                print(f'DATALOADER:{result_idx} TEST RESULTS')
+                print(f'DATALOADER:{result_idx} {self.trainer.evaluating.upper()} RESULTS')
                 pprint(results)
                 print('-' * 80)
 

@@ -36,7 +36,10 @@ def copy_trainer_model_properties(self, model):
             m.use_ddp2 = self.trainer.use_ddp2
             m.use_ddp = self.trainer.use_ddp
             m.use_amp = self.trainer.amp_backend is not None
-            m.testing = self.trainer.testing
+            # TODO: I only find usages of m.testing in DDP, where it's used to
+            #  discriminate test from validation, as opposed to test from fit in
+            #  Trainer. Still need to fully determine if it's correct.
+            m.testing = self.trainer.evaluating == 'test'
             m.use_single_gpu = self.trainer.use_single_gpu
             m.use_tpu = self.trainer.use_tpu
             m.tpu_local_core_rank = self.trainer.tpu_local_core_rank