diff --git a/.circleci/config.yml b/.circleci/config.yml index f4789a3181ea5..988a93ce93f4e 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,14 +16,21 @@ references: sudo pip install pytest pytest-cov pytest-flake8 pip install -r ./tests/requirements.txt --user - tests_format: &tests_format + tests: &tests run: - name: Tests and formating + name: Testing command: | python --version ; pip --version ; pip list - py.test pytorch_lightning tests pl_examples -v --doctest-modules --junitxml=test-reports/pytest_junit.xml --flake8 + py.test pytorch_lightning tests pl_examples -v --doctest-modules --junitxml=test-reports/pytest_junit.xml no_output_timeout: 15m + format: &format + run: + name: Formatting + command: | + python --version ; pip --version ; pip list + flake8 + make_docs: &make_docs run: name: Make Documentation @@ -43,6 +50,16 @@ jobs: - checkout - *make_docs + Formatting: + docker: + - image: circleci/python:3.7 + environment: + - TORCH_VERSION: "torch" + steps: + - checkout + - *install_deps + - *format + PyTorch: docker: - image: circleci/python:3.7 @@ -52,7 +69,7 @@ jobs: - checkout - *install_deps - - *tests_format + - *tests - store_test_results: path: test-reports @@ -80,11 +97,20 @@ jobs: - TORCH_VERSION: "torch>=1.3, <1.4" steps: *steps + PyTorch-v1.4: + docker: + - image: circleci/python:3.6 + environment: + - TORCH_VERSION: "torch>=1.4, <1.5" + steps: *steps + workflows: version: 2 build: jobs: + - Formatting - Build-Docs - PyTorch-v1.1 - PyTorch-v1.2 - PyTorch-v1.3 + - PyTorch-v1.4 diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py index 5d9d9c6fd2ddd..dc5a502c5f89a 100644 --- a/pytorch_lightning/core/hooks.py +++ b/pytorch_lightning/core/hooks.py @@ -1,5 +1,6 @@ """ -# Hooks +Hooks +===== There are cases when you might want to do something different at different parts of the training/validation loop. To enable a hook, simply override the method in your LightningModule and the trainer will call it at the correct time. diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 22fff33367d34..3fca3968454ed 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -135,7 +135,7 @@ def training_step(self, batch, batch_idx): logger_logs = {'training_loss': loss} # optional (MUST ALL BE TENSORS) - # if using TestTubeLogger or TensorboardLogger you can nest scalars + # if using TestTubeLogger or TensorBoardLogger you can nest scalars logger_logs = {'losses': logger_logs} # optional (MUST ALL BE TENSORS) output = { @@ -665,9 +665,10 @@ def configure_optimizers(self): .. note:: If you use multiple optimizers, training_step will have an additional `optimizer_idx` parameter. - .. note:: If you use LBFGS lightning handles the closure function automatically for you. - - .. note:: If you use multiple optimizers, gradients will be calculated only for the parameters of current optimizer at each training step. + .. note:: If you use LBFGS lightning handles the closure function automatically for you + + .. note:: If you use multiple optimizers, gradients will be calculated only + for the parameters of current optimizer at each training step. Example ------- @@ -939,9 +940,9 @@ def load_from_metrics(cls, weights_path, tags_csv, map_location=None): for mapping storage {'cuda:1':'cuda:0'} :return: The pretrained LightningModule - If you're using test tube, there is an alternate method which uses the meta_tags.csv - file from test-tube to rebuild the model. The meta_tags.csv file can be found in the - test-tube experiment save_dir. + If you're using `test-tube`, there is an alternate method which uses the meta_tags.csv + file from test-tube to rebuild the model. The `meta_tags.csv` file can be found in the + `test-tube` experiment save_dir. .. code-block:: python diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index 129568d03e7a5..9a588bacb87fd 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -166,8 +166,8 @@ def __init__(self, hparams): """ from os import environ -from .base import LightningLoggerBase, rank_zero_only +from .base import LightningLoggerBase, rank_zero_only from .tensorboard import TensorBoardLogger try: diff --git a/pytorch_lightning/logging/tensorboard.py b/pytorch_lightning/logging/tensorboard.py index 26d626d1cd62c..85279ce728d7b 100644 --- a/pytorch_lightning/logging/tensorboard.py +++ b/pytorch_lightning/logging/tensorboard.py @@ -1,8 +1,10 @@ import os from warnings import warn +from argparse import Namespace +from pkg_resources import parse_version import torch -from pkg_resources import parse_version +import pandas as pd from torch.utils.tensorboard import SummaryWriter from .base import LightningLoggerBase, rank_zero_only @@ -28,8 +30,8 @@ class TensorBoardLogger(LightningLoggerBase): directory for existing versions, then automatically assigns the next available version. :param \**kwargs: Other arguments are passed directly to the :class:`SummaryWriter` constructor. - """ + NAME_CSV_TAGS = 'meta_tags.csv' def __init__(self, save_dir, name="default", version=None, **kwargs): super().__init__() @@ -38,6 +40,7 @@ def __init__(self, save_dir, name="default", version=None, **kwargs): self._version = version self._experiment = None + self.tags = {} self.kwargs = kwargs @property @@ -57,22 +60,25 @@ def experiment(self): @rank_zero_only def log_hyperparams(self, params): + if params is None: + return + + # in case converting from namespace + if isinstance(params, Namespace): + params = vars(params) + params = dict(params) + if parse_version(torch.__version__) < parse_version("1.3.0"): warn( f"Hyperparameter logging is not available for Torch version {torch.__version__}." " Skipping log_hyperparams. Upgrade to Torch 1.3.0 or above to enable" " hyperparameter logging." ) - # TODO: some alternative should be added - return - try: - # in case converting from namespace, todo: rather test if it is namespace - params = vars(params) - except TypeError: - pass - if params is not None: + else: # `add_hparams` requires both - hparams and metric - self.experiment.add_hparams(hparam_dict=dict(params), metric_dict={}) + self.experiment.add_hparams(hparam_dict=params, metric_dict={}) + # some alternative should be added + self.tags.update(params) @rank_zero_only def log_metrics(self, metrics, step=None): @@ -89,6 +95,17 @@ def save(self): # you are using PT version (=0.20.2 tqdm>=4.35.0 numpy>=1.16.4 torch>=1.1 -torchvision>=0.4.0 +torchvision>=0.4.0, < 0.5 # the 0.5. has some issues with torch JIT pandas>=0.24 # lower version do not support py3.7 -test-tube>=0.7.5 +tensorboard>=1.14 future>=0.17.1 # required for builtins in setup.py \ No newline at end of file diff --git a/tests/requirements.txt b/tests/requirements.txt index 104745175a4a9..dc566d720379e 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -5,7 +5,7 @@ pytest>=3.0.5 pytest-cov flake8 check-manifest -# test_tube # already installed in main req. +test-tube>=0.7.5 mlflow comet_ml wandb diff --git a/tests/test_cpu_models.py b/tests/test_cpu_models.py index 1e71b8cd395ed..03fe976c44b35 100644 --- a/tests/test_cpu_models.py +++ b/tests/test_cpu_models.py @@ -29,7 +29,7 @@ def test_early_stopping_cpu_model(tmpdir): show_progress_bar=True, logger=tutils.get_test_tube_logger(tmpdir), train_percent_check=0.1, - val_percent_check=0.1 + val_percent_check=0.1, ) model, hparams = tutils.get_model() @@ -51,7 +51,7 @@ def test_lbfgs_cpu_model(tmpdir): show_progress_bar=False, weights_summary='top', train_percent_check=1.0, - val_percent_check=0.2 + val_percent_check=0.2, ) model, hparams = tutils.get_model(use_test_model=True, lbfgs=True) @@ -70,7 +70,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir): print_nan_grads=True, show_progress_bar=False, train_percent_check=0.01, - val_percent_check=0.01 + val_percent_check=0.01, ) model, hparams = tutils.get_model() diff --git a/tests/test_logging.py b/tests/test_logging.py index 41e422b95405e..5467d0aab3c1c 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -192,6 +192,7 @@ def test_comet_pickle(tmpdir, monkeypatch): trainer2 = pickle.loads(pkl_bytes) trainer2.logger.log_metrics({"acc": 1.0}) + def test_wandb_logger(tmpdir): """Verify that basic functionality of wandb logger works.""" tutils.reset_seed() @@ -201,6 +202,7 @@ def test_wandb_logger(tmpdir): wandb_dir = os.path.join(tmpdir, "wandb") logger = WandbLogger(save_dir=wandb_dir, anonymous=True) + def test_neptune_logger(tmpdir): """Verify that basic functionality of neptune logger works.""" tutils.reset_seed() @@ -223,6 +225,7 @@ def test_neptune_logger(tmpdir): print('result finished') assert result == 1, "Training failed" + def test_wandb_pickle(tmpdir): """Verify that pickling trainer with wandb logger works.""" tutils.reset_seed() @@ -230,6 +233,8 @@ def test_wandb_pickle(tmpdir): from pytorch_lightning.logging import WandbLogger wandb_dir = str(tmpdir) logger = WandbLogger(save_dir=wandb_dir, anonymous=True) + assert logger is not None + def test_neptune_pickle(tmpdir): """Verify that pickling trainer with neptune logger works.""" diff --git a/tests/test_restore_models.py b/tests/test_restore_models.py index 5a3340b3f0092..02d57b31552c5 100644 --- a/tests/test_restore_models.py +++ b/tests/test_restore_models.py @@ -41,12 +41,11 @@ def test_running_test_pretrained_model_ddp(tmpdir): trainer = Trainer(**trainer_options) result = trainer.fit(model) - exp = logger.experiment - logging.info(os.listdir(exp.get_data_path(exp.name, exp.version))) + logging.info(os.listdir(tutils.get_data_path(logger, path_dir=tmpdir))) # correct result and ok accuracy assert result == 1, 'training failed to complete' - pretrained_model = tutils.load_model(logger.experiment, + pretrained_model = tutils.load_model(logger, trainer.checkpoint_callback.filepath, module_class=LightningTestModel) @@ -87,7 +86,7 @@ def test_running_test_pretrained_model(tmpdir): # correct result and ok accuracy assert result == 1, 'training failed to complete' pretrained_model = tutils.load_model( - logger.experiment, trainer.checkpoint_callback.filepath, module_class=LightningTestModel + logger, trainer.checkpoint_callback.filepath, module_class=LightningTestModel ) new_trainer = Trainer(**trainer_options) @@ -171,7 +170,7 @@ def test_running_test_pretrained_model_dp(tmpdir): # correct result and ok accuracy assert result == 1, 'training failed to complete' - pretrained_model = tutils.load_model(logger.experiment, + pretrained_model = tutils.load_model(logger, trainer.checkpoint_callback.filepath, module_class=LightningTestModel) @@ -361,7 +360,7 @@ def test_model_saving_loading(tmpdir): trainer.save_checkpoint(new_weights_path) # load new model - tags_path = logger.experiment.get_data_path(logger.experiment.name, logger.experiment.version) + tags_path = tutils.get_data_path(logger, path_dir=tmpdir) tags_path = os.path.join(tags_path, 'meta_tags.csv') model_2 = LightningTestModel.load_from_metrics(weights_path=new_weights_path, tags_csv=tags_path) diff --git a/tests/test_trainer.py b/tests/test_trainer.py index 2e7128584a86f..90430e5c01ff4 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -51,7 +51,7 @@ class CurrentTestModel(LightningTestModelBase): trainer.save_checkpoint(new_weights_path) # load new model - tags_path = logger.experiment.get_data_path(logger.experiment.name, logger.experiment.version) + tags_path = tutils.get_data_path(logger, path_dir=tmpdir) tags_path = os.path.join(tags_path, 'meta_tags.csv') model_2 = LightningTestModel.load_from_metrics(weights_path=new_weights_path, tags_csv=tags_path) @@ -89,7 +89,7 @@ class CurrentTestModel(LightningValidationStepMixin, LightningTestModelBase): trainer.save_checkpoint(new_weights_path) # load new model - tags_path = logger.experiment.get_data_path(logger.experiment.name, logger.experiment.version) + tags_path = tutils.get_data_path(logger, path_dir=tmpdir) tags_path = os.path.join(tags_path, 'meta_tags.csv') model_2 = LightningTestModel.load_from_metrics(weights_path=new_weights_path, tags_csv=tags_path) @@ -184,9 +184,8 @@ def test_loading_meta_tags(tmpdir): logger.save() # load tags - tags_path = logger.experiment.get_data_path( - logger.experiment.name, logger.experiment.version - ) + '/meta_tags.csv' + path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir) + tags_path = os.path.join(path_expt_dir, 'meta_tags.csv') tags = training_io.load_hparams_from_tags_csv(tags_path) assert tags.batch_size == 32 and tags.hidden_dim == 1000 diff --git a/tests/utils.py b/tests/utils.py index b7d177b60ef89..baec6274b3724 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,4 @@ import os -import shutil import warnings from argparse import Namespace @@ -8,13 +7,9 @@ from pl_examples import LightningTemplateModel from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import ( - ModelCheckpoint, -) -from pytorch_lightning.logging import TestTubeLogger -from pytorch_lightning.testing import ( - LightningTestModel, -) +from pytorch_lightning.callbacks import ModelCheckpoint +from pytorch_lightning.logging import TestTubeLogger, TensorBoardLogger +from pytorch_lightning.testing import LightningTestModel # generate a list of random seeds for each test RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000)) @@ -22,6 +17,7 @@ torch.manual_seed(ROOT_SEED) np.random.seed(ROOT_SEED) RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000)) +ROOT_PATH = os.path.abspath(os.path.dirname(__file__)) def run_model_test_no_loggers(trainer_options, model, min_acc=0.50): @@ -35,8 +31,9 @@ def run_model_test_no_loggers(trainer_options, model, min_acc=0.50): assert result == 1, 'amp + ddp model failed to complete' # test model loading - pretrained_model = load_model(trainer.logger.experiment, - trainer.checkpoint_callback.filepath) + pretrained_model = load_model(trainer.logger, + trainer.checkpoint_callback.filepath, + path_expt=trainer_options.get('default_save_path')) # test new model accuracy for dataloader in model.test_dataloader(): @@ -69,7 +66,7 @@ def run_model_test(trainer_options, model, on_gpu=True): assert result == 1, 'amp + ddp model failed to complete' # test model loading - pretrained_model = load_model(logger.experiment, trainer.checkpoint_callback.filepath) + pretrained_model = load_model(logger, trainer.checkpoint_callback.filepath) # test new model accuracy [run_prediction(dataloader, pretrained_model) for dataloader in model.test_dataloader()] @@ -127,10 +124,28 @@ def get_test_tube_logger(save_dir, debug=True, version=None): return logger -def load_model(exp, root_weights_dir, module_class=LightningTemplateModel): +def get_data_path(expt_logger, path_dir=None): + # some calls contain only experiment not complete logger + expt = expt_logger.experiment if hasattr(expt_logger, 'experiment') else expt_logger + # each logger has to have these attributes + name, version = expt_logger.name, expt_logger.version + # only the test-tube experiment has such attribute + if hasattr(expt, 'get_data_path'): + return expt.get_data_path(name, version) + # the other experiments... + if not path_dir: + path_dir = ROOT_PATH + path_expt = os.path.join(path_dir, name, 'version_%s' % version) + # try if the new sub-folder exists, typical case for test-tube + if not os.path.isdir(path_expt): + path_expt = path_dir + return path_expt + + +def load_model(exp, root_weights_dir, module_class=LightningTemplateModel, path_expt=None): # load trained model - tags_path = exp.get_data_path(exp.name, exp.version) - tags_path = os.path.join(tags_path, 'meta_tags.csv') + path_expt_dir = get_data_path(exp, path_dir=path_expt) + tags_path = os.path.join(path_expt_dir, TensorBoardLogger.NAME_CSV_TAGS) checkpoints = [x for x in os.listdir(root_weights_dir) if '.ckpt' in x] weights_dir = os.path.join(root_weights_dir, checkpoints[0]) @@ -203,9 +218,8 @@ def set_random_master_port(): os.environ['MASTER_PORT'] = str(port) -def init_checkpoint_callback(logger): - exp = logger.experiment - exp_path = exp.get_data_path(exp.name, exp.version) +def init_checkpoint_callback(logger, path_dir=None): + exp_path = get_data_path(logger, path_dir=path_dir) ckpt_dir = os.path.join(exp_path, 'checkpoints') checkpoint = ModelCheckpoint(ckpt_dir) return checkpoint