Lightning-AI · williamFalcon · Jan 16, 2020 · Jan 14, 2020 · Jan 15, 2020 · Jan 15, 2020
@@ -1,5 +1,6 @@
 """
-# Hooks
+Hooks
+=====
 
 There are cases when you might want to do something different at different parts of the training/validation loop.
  To enable a hook, simply override the method in your LightningModule and the trainer will call it at the correct time.

@@ -135,7 +135,7 @@ def training_step(self, batch, batch_idx):
 
                 logger_logs = {'training_loss': loss} # optional (MUST ALL BE TENSORS)
 
-                # if using TestTubeLogger or TensorboardLogger you can nest scalars
+                # if using TestTubeLogger or TensorBoardLogger you can nest scalars
                 logger_logs = {'losses': logger_logs} # optional (MUST ALL BE TENSORS)
 
                 output = {
@@ -665,9 +665,10 @@ def configure_optimizers(self):
 
         .. note:: If you use multiple optimizers, training_step will have an additional `optimizer_idx` parameter.
 
-        .. note:: If you use LBFGS lightning handles the closure function automatically for you.
-
-        .. note:: If you use multiple optimizers, gradients will be calculated only for the parameters of current optimizer at each training step.
+        .. note:: If you use LBFGS lightning handles the closure function automatically for you
+
+        .. note:: If you use multiple optimizers, gradients will be calculated only
+         for the parameters of current optimizer at each training step.
 
         Example
         -------
@@ -939,9 +940,9 @@ def load_from_metrics(cls, weights_path, tags_csv, map_location=None):
             for mapping storage {'cuda:1':'cuda:0'}
         :return: The pretrained LightningModule
 
-        If you're using test tube, there is an alternate method which uses the meta_tags.csv
-        file from test-tube to rebuild the model. The meta_tags.csv file can be found in the
-        test-tube experiment save_dir.
+        If you're using `test-tube`, there is an alternate method which uses the meta_tags.csv
+        file from test-tube to rebuild the model. The `meta_tags.csv` file can be found in the
+        `test-tube` experiment save_dir.
 
         .. code-block:: python
 

@@ -166,8 +166,8 @@ def __init__(self, hparams):
 """
 
 from os import environ
-from .base import LightningLoggerBase, rank_zero_only
 
+from .base import LightningLoggerBase, rank_zero_only
 from .tensorboard import TensorBoardLogger
 
 try:

@@ -1,8 +1,10 @@
 import os
 from warnings import warn
+from argparse import Namespace
+from pkg_resources import parse_version
 
 import torch
-from pkg_resources import parse_version
+import pandas as pd
 from torch.utils.tensorboard import SummaryWriter
 
 from .base import LightningLoggerBase, rank_zero_only
@@ -28,8 +30,8 @@ class TensorBoardLogger(LightningLoggerBase):
         directory for existing versions, then automatically assigns the next available version.
     :param \**kwargs: Other arguments are passed directly to the :class:`SummaryWriter` constructor.
 
-
     """
+    NAME_CSV_TAGS = 'meta_tags.csv'
 
     def __init__(self, save_dir, name="default", version=None, **kwargs):
         super().__init__()
@@ -38,6 +40,7 @@ def __init__(self, save_dir, name="default", version=None, **kwargs):
         self._version = version
 
         self._experiment = None
+        self.tags = {}
         self.kwargs = kwargs
 
     @property
@@ -57,22 +60,25 @@ def experiment(self):
 
     @rank_zero_only
     def log_hyperparams(self, params):
+        if params is None:
+            return
+
+        # in case converting from namespace
+        if isinstance(params, Namespace):
+            params = vars(params)
+        params = dict(params)
+
         if parse_version(torch.__version__) < parse_version("1.3.0"):
             warn(
                 f"Hyperparameter logging is not available for Torch version {torch.__version__}."
                 " Skipping log_hyperparams. Upgrade to Torch 1.3.0 or above to enable"
                 " hyperparameter logging."
             )
-            # TODO: some alternative should be added
-            return
-        try:
-            # in case converting from namespace, todo: rather test if it is namespace
-            params = vars(params)
-        except TypeError:
-            pass
-        if params is not None:
+        else:
             # `add_hparams` requires both - hparams and metric
-            self.experiment.add_hparams(hparam_dict=dict(params), metric_dict={})
+            self.experiment.add_hparams(hparam_dict=params, metric_dict={})
+        # some alternative should be added
+        self.tags.update(params)
 
     @rank_zero_only
     def log_metrics(self, metrics, step=None):
@@ -89,6 +95,17 @@ def save(self):
             # you are using PT version (<v1.2) which does not have implemented flush
             self.experiment._get_file_writer().flush()
 
+        # create a preudo standard path ala test-tube
+        dir_path = os.path.join(self.save_dir, self.name, 'version_%s' % self.version)
+        if not os.path.isdir(dir_path):
+            dir_path = self.save_dir
+        # prepare the file path
+        meta_tags_path = os.path.join(dir_path, self.NAME_CSV_TAGS)
+        # save the metatags file
+        df = pd.DataFrame({'key': list(self.tags.keys()),
+                           'value': list(self.tags.values())})
+        df.to_csv(meta_tags_path, index=False)
+
     @rank_zero_only
     def finalize(self, status):
         self.save()

@@ -1,5 +1,6 @@
 """
-# Trainer
+Trainer
+=======
 
 The lightning trainer abstracts best practices for running a training, val, test routine.
  It calls parts of your model when it wants to hand over full control and otherwise makes

@@ -2,7 +2,7 @@
 from abc import ABC
 
 from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
-from pytorch_lightning.logging import TensorboardLogger
+from pytorch_lightning.logging import TensorBoardLogger
 
 
 class TrainerCallbackConfigMixin(ABC):
@@ -69,7 +69,7 @@ def configure_early_stopping(self, early_stop_callback, logger):
         # configure logger
         if logger is True:
             # default logger
-            self.logger = TensorboardLogger(
+            self.logger = TensorBoardLogger(
                 save_dir=self.default_save_path,
                 version=self.slurm_job_id,
                 name='lightning_logs'

@@ -1,5 +1,6 @@
 """
-# Validation loop
+Validation loop
+===============
 
 The lightning validation loop handles everything except the actual computations of your model.
 To decide what will happen in your validation loop, define the `validation_step` function.

@@ -96,7 +96,9 @@
 from subprocess import call
 import logging
 from abc import ABC
+from argparse import Namespace
 
+import pandas as pd
 import torch
 import torch.distributed as dist
 
@@ -268,7 +270,6 @@ def save_checkpoint(self, filepath):
             torch.save(checkpoint, filepath)
 
     def restore(self, checkpoint_path, on_gpu):
-
         # if on_gpu:
         #     checkpoint = torch.load(checkpoint_path)
         # else:
@@ -461,14 +462,13 @@ def max_ckpt_in_folder(self, path, name_key='ckpt_'):
 
 
 def load_hparams_from_tags_csv(tags_csv):
-    from argparse import Namespace
-    import pandas as pd
+    if not os.path.isfile(tags_csv):
+        logging.warning(f'Missing Tags: {tags_csv}.')
+        return Namespace()
 
     tags_df = pd.read_csv(tags_csv)
     dic = tags_df.to_dict(orient='records')
-
     ns_dict = {row['key']: convert(row['value']) for row in dic}
-
     ns = Namespace(**ns_dict)
     return ns
 

@@ -458,7 +458,7 @@ def run_training_batch(self, batch, batch_idx):
 
             # call training_step once per optimizer
             for opt_idx, optimizer in enumerate(self.optimizers):
-                # make sure only the gradients of the current optimizer's paramaters are calculated 
+                # make sure only the gradients of the current optimizer's paramaters are calculated
                 # in the training step to prevent dangling gradients in multiple-optimizer setup.
                 for param in self.get_model().parameters():
                     param.requires_grad = False

@@ -4,5 +4,5 @@ numpy>=1.16.4
 torch>=1.1
 torchvision>=0.4.0
 pandas>=0.24  # lower version do not support py3.7
-test-tube>=0.7.5
+tensorboard>=1.14
 future>=0.17.1  # required for builtins in setup.py
@@ -5,7 +5,7 @@ pytest>=3.0.5
 pytest-cov
 flake8
 check-manifest
-# test_tube  # already installed in main req.
+test-tube>=0.7.5
 mlflow
 comet_ml
 wandb

@@ -29,7 +29,7 @@ def test_early_stopping_cpu_model(tmpdir):
         show_progress_bar=True,
         logger=tutils.get_test_tube_logger(tmpdir),
         train_percent_check=0.1,
-        val_percent_check=0.1
+        val_percent_check=0.1,
     )
 
     model, hparams = tutils.get_model()
@@ -51,7 +51,7 @@ def test_lbfgs_cpu_model(tmpdir):
         show_progress_bar=False,
         weights_summary='top',
         train_percent_check=1.0,
-        val_percent_check=0.2
+        val_percent_check=0.2,
     )
 
     model, hparams = tutils.get_model(use_test_model=True, lbfgs=True)
@@ -70,7 +70,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir):
         print_nan_grads=True,
         show_progress_bar=False,
         train_percent_check=0.01,
-        val_percent_check=0.01
+        val_percent_check=0.01,
     )
 
     model, hparams = tutils.get_model()

@@ -192,6 +192,7 @@ def test_comet_pickle(tmpdir, monkeypatch):
     trainer2 = pickle.loads(pkl_bytes)
     trainer2.logger.log_metrics({"acc": 1.0})
 
+
 def test_wandb_logger(tmpdir):
     """Verify that basic functionality of wandb logger works."""
     tutils.reset_seed()
@@ -201,6 +202,7 @@ def test_wandb_logger(tmpdir):
     wandb_dir = os.path.join(tmpdir, "wandb")
     logger = WandbLogger(save_dir=wandb_dir, anonymous=True)
 
+
 def test_neptune_logger(tmpdir):
     """Verify that basic functionality of neptune logger works."""
     tutils.reset_seed()
@@ -223,13 +225,16 @@ def test_neptune_logger(tmpdir):
     print('result finished')
     assert result == 1, "Training failed"
 
+
 def test_wandb_pickle(tmpdir):
     """Verify that pickling trainer with wandb logger works."""
     tutils.reset_seed()
 
     from pytorch_lightning.logging import WandbLogger
     wandb_dir = str(tmpdir)
     logger = WandbLogger(save_dir=wandb_dir, anonymous=True)
+    assert logger is not None
+
 
 def test_neptune_pickle(tmpdir):
     """Verify that pickling trainer with neptune logger works."""

@@ -41,12 +41,11 @@ def test_running_test_pretrained_model_ddp(tmpdir):
     trainer = Trainer(**trainer_options)
     result = trainer.fit(model)
 
-    exp = logger.experiment
-    logging.info(os.listdir(exp.get_data_path(exp.name, exp.version)))
+    logging.info(os.listdir(tutils.get_data_path(logger, path_dir=tmpdir)))
 
     # correct result and ok accuracy
     assert result == 1, 'training failed to complete'
-    pretrained_model = tutils.load_model(logger.experiment,
+    pretrained_model = tutils.load_model(logger,
                                          trainer.checkpoint_callback.filepath,
                                          module_class=LightningTestModel)
 
@@ -87,7 +86,7 @@ def test_running_test_pretrained_model(tmpdir):
     # correct result and ok accuracy
     assert result == 1, 'training failed to complete'
     pretrained_model = tutils.load_model(
-        logger.experiment, trainer.checkpoint_callback.filepath, module_class=LightningTestModel
+        logger, trainer.checkpoint_callback.filepath, module_class=LightningTestModel
     )
 
     new_trainer = Trainer(**trainer_options)
@@ -171,7 +170,7 @@ def test_running_test_pretrained_model_dp(tmpdir):
 
     # correct result and ok accuracy
     assert result == 1, 'training failed to complete'
-    pretrained_model = tutils.load_model(logger.experiment,
+    pretrained_model = tutils.load_model(logger,
                                          trainer.checkpoint_callback.filepath,
                                          module_class=LightningTestModel)
 
@@ -361,7 +360,7 @@ def test_model_saving_loading(tmpdir):
     trainer.save_checkpoint(new_weights_path)
 
     # load new model
-    tags_path = logger.experiment.get_data_path(logger.experiment.name, logger.experiment.version)
+    tags_path = tutils.get_data_path(logger, path_dir=tmpdir)
     tags_path = os.path.join(tags_path, 'meta_tags.csv')
     model_2 = LightningTestModel.load_from_metrics(weights_path=new_weights_path,
                                                    tags_csv=tags_path)

@@ -51,7 +51,7 @@ class CurrentTestModel(LightningTestModelBase):
     trainer.save_checkpoint(new_weights_path)
 
     # load new model
-    tags_path = logger.experiment.get_data_path(logger.experiment.name, logger.experiment.version)
+    tags_path = tutils.get_data_path(logger, path_dir=tmpdir)
     tags_path = os.path.join(tags_path, 'meta_tags.csv')
     model_2 = LightningTestModel.load_from_metrics(weights_path=new_weights_path,
                                                    tags_csv=tags_path)
@@ -89,7 +89,7 @@ class CurrentTestModel(LightningValidationStepMixin, LightningTestModelBase):
     trainer.save_checkpoint(new_weights_path)
 
     # load new model
-    tags_path = logger.experiment.get_data_path(logger.experiment.name, logger.experiment.version)
+    tags_path = tutils.get_data_path(logger, path_dir=tmpdir)
     tags_path = os.path.join(tags_path, 'meta_tags.csv')
     model_2 = LightningTestModel.load_from_metrics(weights_path=new_weights_path,
                                                    tags_csv=tags_path)
@@ -184,9 +184,8 @@ def test_loading_meta_tags(tmpdir):
     logger.save()
 
     # load tags
-    tags_path = logger.experiment.get_data_path(
-        logger.experiment.name, logger.experiment.version
-    ) + '/meta_tags.csv'
+    path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir)
+    tags_path = os.path.join(path_expt_dir, 'meta_tags.csv')
     tags = training_io.load_hparams_from_tags_csv(tags_path)
 
     assert tags.batch_size == 32 and tags.hidden_dim == 1000