From 12f95b33d8a2f44ffb48ec1079083ec634c4728f Mon Sep 17 00:00:00 2001
From: Phil Wang <lucidrains@gmail.com>
Date: Wed, 27 Jul 2022 12:25:40 -0700
Subject: [PATCH] rescale values to prevent linear attention from overflowing
 in fp16 setting

---
 denoising_diffusion_pytorch/denoising_diffusion_pytorch.py | 2 ++
 setup.py                                                   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py
index 31212b0bd..9479ca587 100644
--- a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py
+++ b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py
@@ -211,6 +211,8 @@ def forward(self, x):
         k = k.softmax(dim = -1)
 
         q = q * self.scale
+        v = v / (h * w)
+
         context = torch.einsum('b h d n, b h e n -> b h d e', k, v)
 
         out = torch.einsum('b h d e, b h d n -> b h e n', context, q)
diff --git a/setup.py b/setup.py
index ccfd88a85..0eb0bf749 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'denoising-diffusion-pytorch',
   packages = find_packages(),
-  version = '0.26.3',
+  version = '0.26.4',
   license='MIT',
   description = 'Denoising Diffusion Probabilistic Models - Pytorch',
   author = 'Phil Wang',