From 2b742dd2ccd899f24111b0fba4d3726e65b8c88e Mon Sep 17 00:00:00 2001 From: Phil Wang Date: Mon, 11 Jul 2022 21:02:06 -0700 Subject: [PATCH] move accelerator backward outside of autocast context, also calculate total loss correctly across gradient accumulated steps --- README.md | 2 +- .../denoising_diffusion_pytorch.py | 12 +++++++++--- setup.py | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ec9b9c0c8..6e4aad12a 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ trainer = Trainer( diffusion, 'path/to/your/images', train_batch_size = 32, - train_lr = 1e-4, + train_lr = 8e-5, train_num_steps = 700000, # total training steps gradient_accumulate_every = 2, # gradient accumulation steps ema_decay = 0.995, # exponential moving average decay diff --git a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py index 4af427e21..549e68303 100644 --- a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +++ b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py @@ -681,6 +681,7 @@ def __init__( train_num_steps = 100000, ema_update_every = 10, ema_decay = 0.995, + adam_betas = (0.9, 0.99), save_and_sample_every = 1000, num_samples = 25, results_folder = './results', @@ -719,7 +720,7 @@ def __init__( # optimizer - self.opt = Adam(diffusion_model.parameters(), lr = train_lr) + self.opt = Adam(diffusion_model.parameters(), lr = train_lr, betas = adam_betas) # for logging results in a folder periodically @@ -772,14 +773,19 @@ def train(self): while self.step < self.train_num_steps: + total_loss = 0. + for _ in range(self.gradient_accumulate_every): data = next(self.dl).to(device) with self.accelerator.autocast(): loss = self.model(data) - self.accelerator.backward(loss / self.gradient_accumulate_every) + loss = loss / self.gradient_accumulate_every + total_loss += loss.item() + + self.accelerator.backward(loss) - pbar.set_description(f'loss: {loss.item():.4f}') + pbar.set_description(f'loss: {total_loss:.4f}') accelerator.wait_for_everyone() diff --git a/setup.py b/setup.py index 0f9f2fb5c..00d1754ca 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'denoising-diffusion-pytorch', packages = find_packages(), - version = '0.25.2', + version = '0.25.3', license='MIT', description = 'Denoising Diffusion Probabilistic Models - Pytorch', author = 'Phil Wang',