Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
flannerybh authored Jan 1, 2024
2 parents 15fe767 + 0c4f20a commit 69bcf81
Show file tree
Hide file tree
Showing 42 changed files with 1,130 additions and 281 deletions.
30 changes: 13 additions & 17 deletions extras/ip_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import ldm_patched.modules.clip_vision
import safetensors.torch as sf
import ldm_patched.modules.model_management as model_management
import contextlib
import ldm_patched.ldm.modules.attention as attention

from extras.resampler import Resampler
from ldm_patched.modules.model_patcher import ModelPatcher
from modules.core import numpy_to_pytorch
from modules.ops import use_patched_ops
from ldm_patched.modules.ops import manual_cast


SD_V12_CHANNELS = [320] * 4 + [640] * 4 + [1280] * 4 + [1280] * 6 + [640] * 6 + [320] * 6 + [1280] * 2
Expand Down Expand Up @@ -116,14 +117,16 @@ def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
clip_extra_context_tokens = ip_state_dict["image_proj"]["proj.weight"].shape[0] // cross_attention_dim
clip_embeddings_dim = None

ip_adapter = IPAdapterModel(
ip_state_dict,
plus=plus,
cross_attention_dim=cross_attention_dim,
clip_embeddings_dim=clip_embeddings_dim,
clip_extra_context_tokens=clip_extra_context_tokens,
sdxl_plus=sdxl_plus
)
with use_patched_ops(manual_cast):
ip_adapter = IPAdapterModel(
ip_state_dict,
plus=plus,
cross_attention_dim=cross_attention_dim,
clip_embeddings_dim=clip_embeddings_dim,
clip_extra_context_tokens=clip_extra_context_tokens,
sdxl_plus=sdxl_plus
)

ip_adapter.sdxl = sdxl
ip_adapter.load_device = load_device
ip_adapter.offload_device = offload_device
Expand Down Expand Up @@ -167,14 +170,7 @@ def preprocess(img, ip_adapter_path):

ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher)
pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))

if clip_vision.dtype != torch.float32:
precision_scope = torch.autocast
else:
precision_scope = lambda a, b: contextlib.nullcontext(a)

with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)
outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)

ip_adapter = entry['ip_adapter']
ip_layers = entry['ip_layers']
Expand Down
5 changes: 2 additions & 3 deletions extras/resampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ def __init__(
)

def forward(self, x):

latents = self.latents.repeat(x.size(0), 1, 1)
latents = self.latents.repeat(x.size(0), 1, 1).to(x)

x = self.proj_in(x)

Expand All @@ -118,4 +117,4 @@ def forward(self, x):
latents = ff(latents) + latents

latents = self.proj_out(latents)
return self.norm_out(latents)
return self.norm_out(latents)
2 changes: 1 addition & 1 deletion fooocus_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = '2.1.850'
version = '2.1.859'
41 changes: 26 additions & 15 deletions ldm_patched/contrib/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import time
import random

from PIL import Image, ImageOps
from PIL import Image, ImageOps, ImageSequence
from PIL.PngImagePlugin import PngInfo
import numpy as np
import safetensors.torch
Expand Down Expand Up @@ -1412,17 +1412,30 @@ def INPUT_TYPES(s):
FUNCTION = "load_image"
def load_image(self, image):
image_path = ldm_patched.utils.path_utils.get_annotated_filepath(image)
i = Image.open(image_path)
i = ImageOps.exif_transpose(i)
image = i.convert("RGB")
image = np.array(image).astype(np.float32) / 255.0
image = torch.from_numpy(image)[None,]
if 'A' in i.getbands():
mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0
mask = 1. - torch.from_numpy(mask)
img = Image.open(image_path)
output_images = []
output_masks = []
for i in ImageSequence.Iterator(img):
i = ImageOps.exif_transpose(i)
image = i.convert("RGB")
image = np.array(image).astype(np.float32) / 255.0
image = torch.from_numpy(image)[None,]
if 'A' in i.getbands():
mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0
mask = 1. - torch.from_numpy(mask)
else:
mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
output_images.append(image)
output_masks.append(mask.unsqueeze(0))

if len(output_images) > 1:
output_image = torch.cat(output_images, dim=0)
output_mask = torch.cat(output_masks, dim=0)
else:
mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
return (image, mask.unsqueeze(0))
output_image = output_images[0]
output_mask = output_masks[0]

return (output_image, output_mask)

@classmethod
def IS_CHANGED(s, image):
Expand Down Expand Up @@ -1480,13 +1493,10 @@ def IS_CHANGED(s, image, channel):
return m.digest().hex()

@classmethod
def VALIDATE_INPUTS(s, image, channel):
def VALIDATE_INPUTS(s, image):
if not ldm_patched.utils.path_utils.exists_annotated_filepath(image):
return "Invalid image file: {}".format(image)

if channel not in s._color_channels:
return "Invalid color channel: {}".format(channel)

return True

class ImageScale:
Expand Down Expand Up @@ -1871,6 +1881,7 @@ def init_custom_nodes():
"nodes_video_model.py",
"nodes_sag.py",
"nodes_perpneg.py",
"nodes_stable3d.py",
]

for node_file in extras_files:
Expand Down
6 changes: 4 additions & 2 deletions ldm_patched/contrib/external_custom_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,17 @@ def INPUT_TYPES(s):
return {"required":
{"model": ("MODEL",),
"steps": ("INT", {"default": 1, "min": 1, "max": 10}),
"denoise": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}),
}
}
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "sampling/custom_sampling/schedulers"

FUNCTION = "get_sigmas"

def get_sigmas(self, model, steps):
timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[:steps]
def get_sigmas(self, model, steps, denoise):
start_step = 10 - int(10 * denoise)
timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[start_step:start_step + steps]
sigmas = model.model.model_sampling.sigma(timesteps)
sigmas = torch.cat([sigmas, sigmas.new_zeros([1])])
return (sigmas, )
Expand Down
4 changes: 2 additions & 2 deletions ldm_patched/contrib/external_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def INPUT_TYPES(s):

OUTPUT_NODE = True

CATEGORY = "_for_testing"
CATEGORY = "image/animation"

def save_images(self, images, fps, filename_prefix, lossless, quality, method, num_frames=0, prompt=None, extra_pnginfo=None):
method = self.methods.get(method)
Expand Down Expand Up @@ -138,7 +138,7 @@ def INPUT_TYPES(s):

OUTPUT_NODE = True

CATEGORY = "_for_testing"
CATEGORY = "image/animation"

def save_images(self, images, fps, compress_level, filename_prefix="ldm_patched", prompt=None, extra_pnginfo=None):
filename_prefix += self.prefix_append
Expand Down
3 changes: 2 additions & 1 deletion ldm_patched/contrib/external_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ldm_patched.contrib.external import MAX_RESOLUTION

def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
source = source.to(destination.device)
if resize_source:
source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear")

Expand All @@ -22,7 +23,7 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou
if mask is None:
mask = torch.ones_like(source)
else:
mask = mask.clone()
mask = mask.to(destination.device, copy=True)
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[2], source.shape[3]), mode="bilinear")
mask = ldm_patched.modules.utils.repeat_to_batch_size(mask, source.shape[0])

Expand Down
32 changes: 31 additions & 1 deletion ldm_patched/contrib/external_rebatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,40 @@ def rebatch(self, latents, batch_size):

return (output_list,)

class ImageRebatch:
@classmethod
def INPUT_TYPES(s):
return {"required": { "images": ("IMAGE",),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
}}
RETURN_TYPES = ("IMAGE",)
INPUT_IS_LIST = True
OUTPUT_IS_LIST = (True, )

FUNCTION = "rebatch"

CATEGORY = "image/batch"

def rebatch(self, images, batch_size):
batch_size = batch_size[0]

output_list = []
all_images = []
for img in images:
for i in range(img.shape[0]):
all_images.append(img[i:i+1])

for i in range(0, len(all_images), batch_size):
output_list.append(torch.cat(all_images[i:i+batch_size], dim=0))

return (output_list,)

NODE_CLASS_MAPPINGS = {
"RebatchLatents": LatentRebatch,
"RebatchImages": ImageRebatch,
}

NODE_DISPLAY_NAME_MAPPINGS = {
"RebatchLatents": "Rebatch Latents",
}
"RebatchImages": "Rebatch Images",
}
2 changes: 1 addition & 1 deletion ldm_patched/contrib/external_sag.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def post_cfg_function(args):
(sag, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, uncond, None, degraded_noised, sigma, model_options)
return cfg_result + (degraded - sag) * sag_scale

m.set_model_sampler_post_cfg_function(post_cfg_function)
m.set_model_sampler_post_cfg_function(post_cfg_function, disable_cfg1_optimization=True)

# from diffusers:
# unet.mid_block.attentions[0].transformer_blocks[0].attn1.patch
Expand Down
60 changes: 60 additions & 0 deletions ldm_patched/contrib/external_stable3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py

import torch
import ldm_patched.contrib.external
import ldm_patched.modules.utils

def camera_embeddings(elevation, azimuth):
elevation = torch.as_tensor([elevation])
azimuth = torch.as_tensor([azimuth])
embeddings = torch.stack(
[
torch.deg2rad(
(90 - elevation) - (90)
), # Zero123 polar is 90-elevation
torch.sin(torch.deg2rad(azimuth)),
torch.cos(torch.deg2rad(azimuth)),
torch.deg2rad(
90 - torch.full_like(elevation, 0)
),
], dim=-1).unsqueeze(1)

return embeddings


class StableZero123_Conditioning:
@classmethod
def INPUT_TYPES(s):
return {"required": { "clip_vision": ("CLIP_VISION",),
"init_image": ("IMAGE",),
"vae": ("VAE",),
"width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
"height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
"elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
"azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
}}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "negative", "latent")

FUNCTION = "encode"

CATEGORY = "conditioning/3d_models"

def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
output = clip_vision.encode_image(init_image)
pooled = output.image_embeds.unsqueeze(0)
pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
encode_pixels = pixels[:,:,:,:3]
t = vae.encode(encode_pixels)
cam_embeds = camera_embeddings(elevation, azimuth)
cond = torch.cat([pooled, cam_embeds.repeat((pooled.shape[0], 1, 1))], dim=-1)

positive = [[cond, {"concat_latent_image": t}]]
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
return (positive, negative, {"samples":latent})

NODE_CLASS_MAPPINGS = {
"StableZero123_Conditioning": StableZero123_Conditioning,
}
5 changes: 3 additions & 2 deletions ldm_patched/ldm/models/autoencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from ldm_patched.ldm.util import instantiate_from_config
from ldm_patched.ldm.modules.ema import LitEma
import ldm_patched.modules.ops

class DiagonalGaussianRegularizer(torch.nn.Module):
def __init__(self, sample: bool = True):
Expand Down Expand Up @@ -161,12 +162,12 @@ def __init__(self, embed_dim: int, **kwargs):
},
**kwargs,
)
self.quant_conv = torch.nn.Conv2d(
self.quant_conv = ldm_patched.modules.ops.disable_weight_init.Conv2d(
(1 + ddconfig["double_z"]) * ddconfig["z_channels"],
(1 + ddconfig["double_z"]) * embed_dim,
1,
)
self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
self.post_quant_conv = ldm_patched.modules.ops.disable_weight_init.Conv2d(embed_dim, ddconfig["z_channels"], 1)
self.embed_dim = embed_dim

def get_autoencoder_params(self) -> list:
Expand Down
2 changes: 1 addition & 1 deletion ldm_patched/ldm/modules/diffusionmodules/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def nonlinearity(x):


def Normalize(in_channels, num_groups=32):
return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)
return ops.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)


class Upsample(nn.Module):
Expand Down
4 changes: 2 additions & 2 deletions ldm_patched/ldm/modules/diffusionmodules/upscaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def register_schedule(self, beta_schedule="linear", timesteps=1000,

def q_sample(self, x_start, t, noise=None):
noise = default(noise, lambda: torch.randn_like(x_start))
return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise)
return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start +
extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise)

def forward(self, x):
return x, None
Expand Down
6 changes: 3 additions & 3 deletions ldm_patched/ldm/modules/diffusionmodules/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ def get_alpha(self, image_only_indicator: torch.Tensor) -> torch.Tensor:
if self.merge_strategy == "fixed":
# make shape compatible
# alpha = repeat(self.mix_factor, '1 -> b () t () ()', t=t, b=bs)
alpha = self.mix_factor
alpha = self.mix_factor.to(image_only_indicator.device)
elif self.merge_strategy == "learned":
alpha = torch.sigmoid(self.mix_factor)
alpha = torch.sigmoid(self.mix_factor.to(image_only_indicator.device))
# make shape compatible
# alpha = repeat(alpha, '1 -> s () ()', s = t * bs)
elif self.merge_strategy == "learned_with_images":
assert image_only_indicator is not None, "need image_only_indicator ..."
alpha = torch.where(
image_only_indicator.bool(),
torch.ones(1, 1, device=image_only_indicator.device),
rearrange(torch.sigmoid(self.mix_factor), "... -> ... 1"),
rearrange(torch.sigmoid(self.mix_factor.to(image_only_indicator.device)), "... -> ... 1"),
)
alpha = rearrange(alpha, self.rearrange_pattern)
# make shape compatible
Expand Down
4 changes: 2 additions & 2 deletions ldm_patched/ldm/modules/encoders/noise_aug_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs):

def scale(self, x):
# re-normalize to centered mean and unit variance
x = (x - self.data_mean) * 1. / self.data_std
x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device)
return x

def unscale(self, x):
# back to original data stats
x = (x * self.data_std) + self.data_mean
x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device)
return x

def forward(self, x, noise_level=None):
Expand Down
Loading

0 comments on commit 69bcf81

Please sign in to comment.