Skip to content

Commit

Permalink
example debugging
Browse files Browse the repository at this point in the history
test


init


test


test


mock


mock


test


test


test


test


test


test


test


test


test


test


test


test
  • Loading branch information
awaelchli committed Sep 19, 2020
1 parent 9335769 commit 2093e52
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 2 deletions.
64 changes: 64 additions & 0 deletions pl_examples/overfit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import torch
from torch.nn import Conv2d
from torch.optim import SGD
from torch.utils.data import DataLoader, Dataset
from pytorch_lightning.metrics.regression import MSE
import pytorch_lightning as pl
from pytorch_lightning import Trainer


class MyDataset(Dataset):
def __init__(self, size=100):
super(MyDataset, self).__init__()
self.data = torch.stack([idx * torch.ones(3, 100, 100) for idx in range(size)])
self.idx_list = []

def __getitem__(self, idx):
return self.data[idx]

def __len__(self):
return self.data.shape[0]


class MyModel(pl.LightningModule):
def __init__(self):
super(MyModel, self).__init__()
self.conv_1 = Conv2d(in_channels=3, out_channels=3, kernel_size=1, stride=1)
self.loss = MSE()
self.idx_list = []

def forward(self, batch):
return self.conv_1(batch)

def training_step(self, batch, batch_idx):
idx = batch[0, 0, 0, 0].detach()
pred = self.forward(batch)
loss = pred.sum()
print(self.trainer.train_dataloader.sampler)
return {'loss': loss, 'idx': idx}

def training_epoch_end(self, outputs):
idx_list = torch.tensor([x['idx'] for x in outputs])
print('Epoch: {}, device: {} samples: {}'.format(self.current_epoch, self.device, idx_list))
return torch.stack([x['loss'] for x in outputs]).mean()

def setup(self, stage):
self.dataset = MyDataset()

def train_dataloader(self):
loader = DataLoader(self.dataset, batch_size=1, num_workers=20, pin_memory=True, shuffle=False)
return loader

def configure_optimizers(self):
return SGD(self.parameters(), lr=0.001)


def main():
pl_model = MyModel()
# trainer = Trainer(distributed_backend='ddp', num_nodes=1, gpus=2, overfit_batches=4)
trainer = Trainer(distributed_backend="ddp_cpu", gpus=2, overfit_batches=5, max_epochs=4, check_val_every_n_epoch=100)
trainer.fit(pl_model)


if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion pytorch_lightning/trainer/data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ def auto_add_sampler(self, dataloader: DataLoader, train: bool) -> DataLoader:
is_dataloader = isinstance(dataloader, DataLoader)
# don't manipulate iterable datasets
is_iterable_ds = has_iterable_dataset(dataloader)

if not is_dataloader or is_iterable_ds:
return dataloader
need_dist_sampler = (self.use_ddp or self.use_ddp2 or self.use_horovod or self.use_tpu)
Expand Down Expand Up @@ -181,6 +180,7 @@ def reset_train_dataloader(self, model: LightningModule) -> None:

# automatically add samplers
self.train_dataloader = self.auto_add_sampler(self.train_dataloader, train=True)
print(self.train_dataloader.sampler)

self.num_training_batches = len(self.train_dataloader) if has_len(self.train_dataloader) else float('inf')
self._worker_check(self.train_dataloader, 'train dataloader')
Expand Down
21 changes: 20 additions & 1 deletion tests/trainer/test_trainer_tricks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from copy import deepcopy
from unittest.mock import patch, _patch

import pytest
import torch
from torch.utils.data import RandomSampler, SequentialSampler, DataLoader
from torch.utils.data import RandomSampler, SequentialSampler, DataLoader, DistributedSampler

import tests.base.develop_utils as tutils
from pytorch_lightning import Trainer
Expand Down Expand Up @@ -141,6 +143,23 @@ def test_overfit_batch_limits(tmpdir):
assert loader_num_batches[0] == 10


@patch('tests.base.model_template.EvalModelTemplate.train_dataloader')
def test_overfit_batches_shuffling(train_dataloader_mock, tmpdir):
model = EvalModelTemplate()
shuffled_loader = DataLoader(model.dataloader(train=True).dataset, batch_size=2, shuffle=True)
train_dataloader_mock.return_value = shuffled_loader
trainer = Trainer(default_root_dir=tmpdir, overfit_batches=2, max_steps=1, distributed_backend="ddp_spawn", gpus=2)
trainer.fit(model)
# train_loader = DataLoader(model.train_dataloader().dataset, shuffle=False)

# trainer.reset_train_dataloader(model)
# print(trainer.val_dataloaders)
# # print(trainer.train_dataloader)
sampler = model.train_dataloader().sampler
assert isinstance(sampler, DistributedSampler) and not sampler.shuffle



def test_model_reset_correctly(tmpdir):
""" Check that model weights are correctly reset after scaling batch size. """
tutils.reset_seed()
Expand Down

0 comments on commit 2093e52

Please sign in to comment.