Update to Disco v5

pull/1/head
Max 3 years ago
parent 360be272e0
commit 39c867f7ff
  1. 3084
      Disco_Diffusion.ipynb
  2. 46
      LICENSE
  3. 65
      README.md
  4. 1446
      archive/Disco_Diffusion_v3_1_[w_SLIP_&_DangoCutn].ipynb
  5. 0
      archive/Disco_Diffusion_v4_1_[w_Video_Inits,_Recovery_&_DDIM_Sharpen].ipynb
  6. 1135
      archive/QoL_MP_Diffusion_v2_[w_Secondary_Model_v2].ipynb
  7. 108
      disco_xform_utils.py

File diff suppressed because it is too large Load Diff

@ -0,0 +1,46 @@
Licensed under the MIT License
Copyright (c) 2021 Katherine Crowson
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
--
Licensed under the MIT License
Copyright (c) 2021 Maxwell Ingham
Copyright (c) 2022 Adam Letts
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

@ -0,0 +1,65 @@
# disco-diffusion
v1 Update: Oct 29th 2021 - Somnai
QoL improvements added by Somnai (@somnai_dreams), including user friendly UI, settings+prompt saving and improved google drive folder organization.
v1.1 Update: Nov 13th 2021 - Somnai
Now includes sizing options, intermediate saves and fixed image prompts and perlin inits. unexposed batch option since it doesn't work
v2 Update: Nov 22nd 2021 - Somnai
Initial addition of Katherine Crowson's Secondary Model Method (https://colab.research.google.com/drive/1mpkrhOjoyzPeSWy2r7T8EYRaU7amYOOi#scrollTo=X5gODNAMEUCR)
Noticed settings were saving with the wrong name so corrected it. Let me know if you preferred the old scheme.
v3 Update: Dec 24th 2021 - Somnai
Implemented Dango's advanced cutout method
Added SLIP models, thanks to NeuralDivergent
Fixed issue with NaNs resulting in black images, with massive help and testing from @Softology
Perlin now changes properly within batches (not sure where this perlin_regen code came from originally, but thank you)
v4 Update: Jan 2021 - Somnai
Implemented Diffusion Zooming
Added Chigozie keyframing
Made a bunch of edits to processes
v4.1 Update: Jan 14th 2021 - Somnai
Added video input mode
Added license that somehow went missing
Added improved prompt keyframing, fixed image_prompts and multiple prompts
Improved UI
Significant under the hood cleanup and improvement
Refined defaults for each mode
Added latent-diffusion SuperRes for sharpening
Added resume run mode
v4.9 Update: Feb 5th 2022 - gandamu / Adam Letts
Added 3D
Added brightness corrections to prevent animation from steadily going dark over time
v4.91 Update: Feb 19th 2022 - gandamu / Adam Letts
Cleaned up 3D implementation and made associated args accessible via Colab UI elements
v4.92 Update: Feb 20th 2022 - gandamu / Adam Letts
Separated transform code

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,108 @@
import torch, torchvision
import pytorch3d.renderer.cameras as p3dCam
import midas_utils
from PIL import Image
import numpy as np
import sys, math
try:
from infer import InferenceHelper
except:
print("disco_xform_utils.py failed to import InferenceHelper. Please ensure that AdaBins directory is in the path (i.e. via sys.path.append('./AdaBins') or other means).")
sys.exit()
MAX_ADABINS_AREA = 500000
@torch.no_grad()
def transform_image_3d(img_filepath, midas_model, midas_transform, device, rot_mat=torch.eye(3).unsqueeze(0), translate=(0.,0.,-0.04), near=2000, far=20000, fov_deg=60, padding_mode='border', sampling_mode='bicubic', midas_weight = 0.3):
img_pil = Image.open(open(img_filepath, 'rb')).convert('RGB')
w, h = img_pil.size
image_tensor = torchvision.transforms.functional.to_tensor(img_pil).to(device)
use_adabins = midas_weight < 1.0
if use_adabins:
# AdaBins
"""
predictions using nyu dataset
"""
print("Running AdaBins depth estimation implementation...")
infer_helper = InferenceHelper(dataset='nyu')
image_pil_area = w*h
if image_pil_area > MAX_ADABINS_AREA:
scale = math.sqrt(MAX_ADABINS_AREA) / math.sqrt(image_pil_area)
depth_input = img_pil.resize((int(w*scale), int(h*scale)), Image.LANCZOS) # LANCZOS is supposed to be good for downsampling.
else:
depth_input = img_pil
try:
_, adabins_depth = infer_helper.predict_pil(depth_input)
adabins_depth = torchvision.transforms.functional.resize(torch.from_numpy(adabins_depth), image_tensor.shape[-2:], interpolation=torchvision.transforms.functional.InterpolationMode.BICUBIC).squeeze().to(device)
adabins_depth_np = adabins_depth.cpu().numpy()
except:
pass
torch.cuda.empty_cache()
# MiDaS
img_midas = midas_utils.read_image(img_filepath)
img_midas_input = midas_transform({"image": img_midas})["image"]
midas_optimize = True
# MiDaS depth estimation implementation
print("Running MiDaS depth estimation implementation...")
sample = torch.from_numpy(img_midas_input).float().to(device).unsqueeze(0)
if midas_optimize==True and device == torch.device("cuda"):
sample = sample.to(memory_format=torch.channels_last)
sample = sample.half()
prediction_torch = midas_model.forward(sample)
prediction_torch = torch.nn.functional.interpolate(
prediction_torch.unsqueeze(1),
size=img_midas.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
prediction_np = prediction_torch.clone().cpu().numpy()
print("Finished depth estimation.")
torch.cuda.empty_cache()
# MiDaS makes the near values greater, and the far values lesser. Let's reverse that and try to align with AdaBins a bit better.
prediction_np = np.subtract(50.0, prediction_np)
prediction_np = prediction_np / 19.0
if use_adabins:
adabins_weight = 1.0 - midas_weight
depth_map = prediction_np*midas_weight + adabins_depth_np*adabins_weight
else:
depth_map = prediction_np
depth_map = np.expand_dims(depth_map, axis=0)
depth_tensor = torch.from_numpy(depth_map).squeeze().to(device)
pixel_aspect = 1.0 # really.. the aspect of an individual pixel! (so usually 1.0)
persp_cam_old = p3dCam.FoVPerspectiveCameras(near, far, pixel_aspect, fov=fov_deg, degrees=True, device=device)
persp_cam_new = p3dCam.FoVPerspectiveCameras(near, far, pixel_aspect, fov=fov_deg, degrees=True, R=rot_mat, T=torch.tensor([translate]), device=device)
# range of [-1,1] is important to torch grid_sample's padding handling
y,x = torch.meshgrid(torch.linspace(-1.,1.,h,dtype=torch.float32,device=device),torch.linspace(-1.,1.,w,dtype=torch.float32,device=device))
z = torch.as_tensor(depth_tensor, dtype=torch.float32, device=device)
xyz_old_world = torch.stack((x.flatten(), y.flatten(), z.flatten()), dim=1)
# Transform the points using pytorch3d. With current functionality, this is overkill and prevents it from working on Windows.
# If you want it to run on Windows (without pytorch3d), then the transforms (and/or perspective if that's separate) can be done pretty easily without it.
xyz_old_cam_xy = persp_cam_old.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
xyz_new_cam_xy = persp_cam_new.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
offset_xy = xyz_new_cam_xy - xyz_old_cam_xy
# affine_grid theta param expects a batch of 2D mats. Each is 2x3 to do rotation+translation.
identity_2d_batch = torch.tensor([[1.,0.,0.],[0.,1.,0.]], device=device).unsqueeze(0)
# coords_2d will have shape (N,H,W,2).. which is also what grid_sample needs.
coords_2d = torch.nn.functional.affine_grid(identity_2d_batch, [1,1,h,w], align_corners=False)
offset_coords_2d = coords_2d - torch.reshape(offset_xy, (h,w,2)).unsqueeze(0)
new_image = torch.nn.functional.grid_sample(image_tensor.add(1/512 - 0.0001).unsqueeze(0), offset_coords_2d, mode=sampling_mode, padding_mode=padding_mode, align_corners=False)
img_pil = torchvision.transforms.ToPILImage()(new_image.squeeze().clamp(0,1.))
torch.cuda.empty_cache()
return img_pil
Loading…
Cancel
Save