From 12f95b33d8a2f44ffb48ec1079083ec634c4728f Mon Sep 17 00:00:00 2001 From: Phil Wang Date: Wed, 27 Jul 2022 12:25:40 -0700 Subject: [PATCH] rescale values to prevent linear attention from overflowing in fp16 setting --- denoising_diffusion_pytorch/denoising_diffusion_pytorch.py | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py index 31212b0bd..9479ca587 100644 --- a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +++ b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py @@ -211,6 +211,8 @@ def forward(self, x): k = k.softmax(dim = -1) q = q * self.scale + v = v / (h * w) + context = torch.einsum('b h d n, b h e n -> b h d e', k, v) out = torch.einsum('b h d e, b h d n -> b h e n', context, q) diff --git a/setup.py b/setup.py index ccfd88a85..0eb0bf749 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'denoising-diffusion-pytorch', packages = find_packages(), - version = '0.26.3', + version = '0.26.4', license='MIT', description = 'Denoising Diffusion Probabilistic Models - Pytorch', author = 'Phil Wang',