Update to Disco v5

3 years ago · 39c867f7ff
parent 360be272e0
commit 39c867f7ff
7 changed files with 5884 additions and 0 deletions
--- a/Disco_Diffusion.ipynb
+++ b/Disco_Diffusion.ipynb
--- a/46
+++ b/46
@ -0,0 +1,46 @@
+Licensed under the MIT License
+
+Copyright (c) 2021 Katherine Crowson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+--
+
+Licensed under the MIT License
+
+Copyright (c) 2021 Maxwell Ingham
+Copyright (c) 2022 Adam Letts
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,65 @@
+# disco-diffusion
+
+v1 Update: Oct 29th 2021 - Somnai
+
+      QoL improvements added by Somnai (@somnai_dreams), including user friendly UI, settings+prompt saving and improved google drive folder organization.
+
+v1.1 Update: Nov 13th 2021 - Somnai
+
+      Now includes sizing options, intermediate saves and fixed image prompts and perlin inits. unexposed batch option since it doesn't work
+
+v2 Update: Nov 22nd 2021 - Somnai
+
+      Initial addition of Katherine Crowson's Secondary Model Method (https://colab.research.google.com/drive/1mpkrhOjoyzPeSWy2r7T8EYRaU7amYOOi#scrollTo=X5gODNAMEUCR)
+
+      Noticed settings were saving with the wrong name so corrected it. Let me know if you preferred the old scheme.
+
+v3 Update: Dec 24th 2021 - Somnai
+
+      Implemented Dango's advanced cutout method
+
+      Added SLIP models, thanks to NeuralDivergent
+
+      Fixed issue with NaNs resulting in black images, with massive help and testing from @Softology
+
+      Perlin now changes properly within batches (not sure where this perlin_regen code came from originally, but thank you)
+
+v4 Update: Jan 2021 - Somnai
+
+      Implemented Diffusion Zooming
+
+      Added Chigozie keyframing
+
+      Made a bunch of edits to processes
+
+v4.1 Update: Jan 14th 2021 - Somnai
+
+      Added video input mode
+
+      Added license that somehow went missing
+
+      Added improved prompt keyframing, fixed image_prompts and multiple prompts
+
+      Improved UI
+
+      Significant under the hood cleanup and improvement
+
+      Refined defaults for each mode
+
+      Added latent-diffusion SuperRes for sharpening
+
+      Added resume run mode
+
+v4.9 Update: Feb 5th 2022 - gandamu / Adam Letts
+
+      Added 3D
+
+      Added brightness corrections to prevent animation from steadily going dark over time
+
+v4.91 Update: Feb 19th 2022 - gandamu / Adam Letts
+
+      Cleaned up 3D implementation and made associated args accessible via Colab UI elements
+
+v4.92 Update: Feb 20th 2022 - gandamu / Adam Letts
+
+      Separated transform code
--- a/archive/Disco_Diffusion_v3_1_[w_SLIP_&_DangoCutn].ipynb
+++ b/archive/Disco_Diffusion_v3_1_[w_SLIP_&_DangoCutn].ipynb
--- a/archive/Disco_Diffusion_v4_1_[w_Video_Inits,_Recovery_&_DDIM_Sharpen].ipynb
+++ b/archive/Disco_Diffusion_v4_1_[w_Video_Inits,_Recovery_&_DDIM_Sharpen].ipynb
--- a/archive/QoL_MP_Diffusion_v2_[w_Secondary_Model_v2].ipynb
+++ b/archive/QoL_MP_Diffusion_v2_[w_Secondary_Model_v2].ipynb
--- a/disco_xform_utils.py
+++ b/disco_xform_utils.py
@ -0,0 +1,108 @@
+import torch, torchvision
+import pytorch3d.renderer.cameras as p3dCam
+import midas_utils
+from PIL import Image
+import numpy as np
+import sys, math
+
+try:
+    from infer import InferenceHelper
+except:
+    print("disco_xform_utils.py failed to import InferenceHelper. Please ensure that AdaBins directory is in the path (i.e. via sys.path.append('./AdaBins') or other means).")
+    sys.exit()
+
+MAX_ADABINS_AREA = 500000
+
+@torch.no_grad()
+def transform_image_3d(img_filepath, midas_model, midas_transform, device, rot_mat=torch.eye(3).unsqueeze(0), translate=(0.,0.,-0.04), near=2000, far=20000, fov_deg=60, padding_mode='border', sampling_mode='bicubic', midas_weight = 0.3):
+    img_pil = Image.open(open(img_filepath, 'rb')).convert('RGB')
+    w, h = img_pil.size
+    image_tensor = torchvision.transforms.functional.to_tensor(img_pil).to(device)
+
+    use_adabins = midas_weight < 1.0
+
+    if use_adabins:
+        # AdaBins
+        """
+        predictions using nyu dataset
+        """
+        print("Running AdaBins depth estimation implementation...")
+        infer_helper = InferenceHelper(dataset='nyu')
+
+        image_pil_area = w*h
+        if image_pil_area > MAX_ADABINS_AREA:
+            scale = math.sqrt(MAX_ADABINS_AREA) / math.sqrt(image_pil_area)
+            depth_input = img_pil.resize((int(w*scale), int(h*scale)), Image.LANCZOS) # LANCZOS is supposed to be good for downsampling.
+        else:
+            depth_input = img_pil
+        try:
+            _, adabins_depth = infer_helper.predict_pil(depth_input)
+            adabins_depth = torchvision.transforms.functional.resize(torch.from_numpy(adabins_depth), image_tensor.shape[-2:], interpolation=torchvision.transforms.functional.InterpolationMode.BICUBIC).squeeze().to(device)
+            adabins_depth_np = adabins_depth.cpu().numpy()
+        except:
+            pass
+
+    torch.cuda.empty_cache()
+
+    # MiDaS
+    img_midas = midas_utils.read_image(img_filepath)
+    img_midas_input = midas_transform({"image": img_midas})["image"]
+    midas_optimize = True
+
+    # MiDaS depth estimation implementation
+    print("Running MiDaS depth estimation implementation...")
+    sample = torch.from_numpy(img_midas_input).float().to(device).unsqueeze(0)
+    if midas_optimize==True and device == torch.device("cuda"):
+        sample = sample.to(memory_format=torch.channels_last)  
+        sample = sample.half()
+    prediction_torch = midas_model.forward(sample)
+    prediction_torch = torch.nn.functional.interpolate(
+            prediction_torch.unsqueeze(1),
+            size=img_midas.shape[:2],
+            mode="bicubic",
+            align_corners=False,
+        ).squeeze()
+    prediction_np = prediction_torch.clone().cpu().numpy()
+
+    print("Finished depth estimation.")
+    torch.cuda.empty_cache()
+
+    # MiDaS makes the near values greater, and the far values lesser. Let's reverse that and try to align with AdaBins a bit better.
+    prediction_np = np.subtract(50.0, prediction_np)
+    prediction_np = prediction_np / 19.0
+
+    if use_adabins:
+        adabins_weight = 1.0 - midas_weight
+        depth_map = prediction_np*midas_weight + adabins_depth_np*adabins_weight
+    else:
+        depth_map = prediction_np
+
+    depth_map = np.expand_dims(depth_map, axis=0)
+    depth_tensor = torch.from_numpy(depth_map).squeeze().to(device)
+
+    pixel_aspect = 1.0 # really.. the aspect of an individual pixel! (so usually 1.0)
+    persp_cam_old = p3dCam.FoVPerspectiveCameras(near, far, pixel_aspect, fov=fov_deg, degrees=True, device=device)
+    persp_cam_new = p3dCam.FoVPerspectiveCameras(near, far, pixel_aspect, fov=fov_deg, degrees=True, R=rot_mat, T=torch.tensor([translate]), device=device)
+
+    # range of [-1,1] is important to torch grid_sample's padding handling
+    y,x = torch.meshgrid(torch.linspace(-1.,1.,h,dtype=torch.float32,device=device),torch.linspace(-1.,1.,w,dtype=torch.float32,device=device))
+    z = torch.as_tensor(depth_tensor, dtype=torch.float32, device=device)
+    xyz_old_world = torch.stack((x.flatten(), y.flatten(), z.flatten()), dim=1)
+
+    # Transform the points using pytorch3d. With current functionality, this is overkill and prevents it from working on Windows.
+    # If you want it to run on Windows (without pytorch3d), then the transforms (and/or perspective if that's separate) can be done pretty easily without it.
+    xyz_old_cam_xy = persp_cam_old.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
+    xyz_new_cam_xy = persp_cam_new.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
+
+    offset_xy = xyz_new_cam_xy - xyz_old_cam_xy
+    # affine_grid theta param expects a batch of 2D mats. Each is 2x3 to do rotation+translation.
+    identity_2d_batch = torch.tensor([[1.,0.,0.],[0.,1.,0.]], device=device).unsqueeze(0)
+    # coords_2d will have shape (N,H,W,2).. which is also what grid_sample needs.
+    coords_2d = torch.nn.functional.affine_grid(identity_2d_batch, [1,1,h,w], align_corners=False)
+    offset_coords_2d = coords_2d - torch.reshape(offset_xy, (h,w,2)).unsqueeze(0)
+    new_image = torch.nn.functional.grid_sample(image_tensor.add(1/512 - 0.0001).unsqueeze(0), offset_coords_2d, mode=sampling_mode, padding_mode=padding_mode, align_corners=False)
+    img_pil = torchvision.transforms.ToPILImage()(new_image.squeeze().clamp(0,1.))
+
+    torch.cuda.empty_cache()
+
+    return img_pil