Merge pull request #38 from alembics/allow-lowres-3d-animations

Allow low-res 3D animations. i.e. upscale for AdaBins
3 years ago · ad97d368c1
parent b2052cb0b1 fa2eff3550
commit ad97d368c1
3 changed files with 40 additions and 33 deletions
--- a/Disco_Diffusion.ipynb
+++ b/Disco_Diffusion.ipynb
@ -296,25 +296,25 @@
        "`image_prompts` | Think of these images more as a description of their contents. | N/A\n",
        "**Image quality:**\n",
        "`clip_guidance_scale`  | Controls how much the image should look like the prompt. | 1000\n",
-        "`tv_scale` |  Controls the smoothness of the final output. | 150\n",
-        "`range_scale` |  Controls how far out of range RGB values are allowed to be. | 150\n",
+        "`tv_scale` | Controls the smoothness of the final output. | 150\n",
+        "`range_scale` | Controls how far out of range RGB values are allowed to be. | 150\n",
        "`sat_scale` | Controls how much saturation is allowed. From nshepperd's JAX notebook. | 0\n",
        "`cutn` | Controls how many crops to take from the image. | 16\n",
-        "`cutn_batches` | Accumulate CLIP gradient from multiple batches of cuts  | 2\n",
+        "`cutn_batches` | Accumulate CLIP gradient from multiple batches of cuts. | 2\n",
        "**Init settings:**\n",
-        "`init_image` |   URL or local path | None\n",
-        "`init_scale` |  This enhances the effect of the init image, a good value is 1000 | 0\n",
+        "`init_image` | URL or local path | None\n",
+        "`init_scale` | This enhances the effect of the init image, a good value is 1000 | 0\n",
        "`skip_steps` | Controls the starting point along the diffusion timesteps | 0\n",
-        "`perlin_init` |  Option to start with random perlin noise | False\n",
-        "`perlin_mode` |  ('gray', 'color') | 'mixed'\n",
+        "`perlin_init` | Option to start with random perlin noise | False\n",
+        "`perlin_mode` | ('gray', 'color') | 'mixed'\n",
        "**Advanced:**\n",
-        "`skip_augs` |Controls whether to skip torchvision augmentations | False\n",
-        "`randomize_class` |Controls whether the imagenet class is randomly changed each iteration | True\n",
-        "`clip_denoised` |Determines whether CLIP discriminates a noisy or denoised image | False\n",
-        "`clamp_grad` |Experimental: Using adaptive clip grad in the cond_fn | True\n",
+        "`skip_augs` | Controls whether to skip torchvision augmentations | False\n",
+        "`randomize_class` | Controls whether the imagenet class is randomly changed each iteration | True\n",
+        "`clip_denoised` | Determines whether CLIP discriminates a noisy or denoised image | False\n",
+        "`clamp_grad` | Experimental: Using adaptive clip grad in the cond_fn | True\n",
        "`seed`  | Choose a random seed and print it at end of run for reproduction | random_seed\n",
        "`fuzzy_prompt` | Controls whether to add multiple noisy prompts to the prompt losses | False\n",
-        "`rand_mag` |Controls the magnitude of the random noise | 0.1\n",
+        "`rand_mag` | Controls the magnitude of the random noise | 0.1\n",
        "`eta` | DDIM hyperparameter | 0.5\n",
        "\n",
        "..\n",
@ -325,10 +325,10 @@
        "Setting | Description | Default\n",
        "--- | --- | ---\n",
        "**Diffusion:**\n",
-        "`timestep_respacing`  | Modify this value to decrease the number of timesteps. | ddim100\n",
+        "`timestep_respacing` | Modify this value to decrease the number of timesteps. | ddim100\n",
        "`diffusion_steps` || 1000\n",
        "**Diffusion:**\n",
-        "`clip_models`  | Models of CLIP to load. Typically the more, the better but they all come at a hefty VRAM cost. | ViT-B/32, ViT-B/16, RN50x4"
+        "`clip_models` | Models of CLIP to load. Typically the more, the better but they all come at a hefty VRAM cost. | ViT-B/32, ViT-B/16, RN50x4"
      ]
    },
    {
@ -1671,7 +1671,7 @@
        "        alphas, sigmas = map(partial(append_dims, n=v.ndim), t_to_alpha_sigma(t))\n",
        "        pred = input * alphas - v * sigmas\n",
        "        eps = input * sigmas + v * alphas\n",
-        "        return DiffusionOutput(v, pred, eps)\n"
+        "        return DiffusionOutput(v, pred, eps)"
      ],
      "outputs": [],
      "execution_count": null
@ -1894,7 +1894,7 @@
        "\n",
        "#Make folder for batch\n",
        "batchFolder = f'{outDirPath}/{batch_name}'\n",
-        "createPath(batchFolder)\n"
+        "createPath(batchFolder)"
      ],
      "outputs": [],
      "execution_count": null
@ -2686,4 +2686,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
--- a/disco.py
+++ b/disco.py
@ -281,25 +281,25 @@ Setting | Description | Default
 `image_prompts` | Think of these images more as a description of their contents. | N/A
 **Image quality:**
 `clip_guidance_scale`  | Controls how much the image should look like the prompt. | 1000
-`tv_scale` |  Controls the smoothness of the final output. | 150
-`range_scale` |  Controls how far out of range RGB values are allowed to be. | 150
+`tv_scale` | Controls the smoothness of the final output. | 150
+`range_scale` | Controls how far out of range RGB values are allowed to be. | 150
 `sat_scale` | Controls how much saturation is allowed. From nshepperd's JAX notebook. | 0
 `cutn` | Controls how many crops to take from the image. | 16
-`cutn_batches` | Accumulate CLIP gradient from multiple batches of cuts  | 2
+`cutn_batches` | Accumulate CLIP gradient from multiple batches of cuts. | 2
 **Init settings:**
-`init_image` |   URL or local path | None
-`init_scale` |  This enhances the effect of the init image, a good value is 1000 | 0
-`skip_steps Controls the starting point along the diffusion timesteps | 0
-`perlin_init` |  Option to start with random perlin noise | False
-`perlin_mode` |  ('gray', 'color') | 'mixed'
+`init_image` | URL or local path | None
+`init_scale` | This enhances the effect of the init image, a good value is 1000 | 0
+`skip_steps` | Controls the starting point along the diffusion timesteps | 0
+`perlin_init` | Option to start with random perlin noise | False
+`perlin_mode` | ('gray', 'color') | 'mixed'
 **Advanced:**
-`skip_augs` |Controls whether to skip torchvision augmentations | False
-`randomize_class` |Controls whether the imagenet class is randomly changed each iteration | True
-`clip_denoised` |Determines whether CLIP discriminates a noisy or denoised image | False
-`clamp_grad` |Experimental: Using adaptive clip grad in the cond_fn | True
+`skip_augs` | Controls whether to skip torchvision augmentations | False
+`randomize_class` | Controls whether the imagenet class is randomly changed each iteration | True
+`clip_denoised` | Determines whether CLIP discriminates a noisy or denoised image | False
+`clamp_grad` | Experimental: Using adaptive clip grad in the cond_fn | True
 `seed`  | Choose a random seed and print it at end of run for reproduction | random_seed
 `fuzzy_prompt` | Controls whether to add multiple noisy prompts to the prompt losses | False
-`rand_mag` |Controls the magnitude of the random noise | 0.1
+`rand_mag` | Controls the magnitude of the random noise | 0.1
 `eta` | DDIM hyperparameter | 0.5

 ..
@ -310,10 +310,10 @@ Setting | Description | Default
 Setting | Description | Default
 --- | --- | ---
 **Diffusion:**
-`timestep_respacing`  | Modify this value to decrease the number of timesteps. | ddim100
+`timestep_respacing` | Modify this value to decrease the number of timesteps. | ddim100
 `diffusion_steps` || 1000
 **Diffusion:**
-`clip_models`  | Models of CLIP to load. Typically the more, the better but they all come at a hefty VRAM cost. | ViT-B/32, ViT-B/16, RN50x4
+`clip_models` | Models of CLIP to load. Typically the more, the better but they all come at a hefty VRAM cost. | ViT-B/32, ViT-B/16, RN50x4
 """

 # %%
--- a/disco_xform_utils.py
+++ b/disco_xform_utils.py
@ -12,6 +12,7 @@ except:
    sys.exit()

 MAX_ADABINS_AREA = 500000
+MIN_ADABINS_AREA = 448*448

@torch.no_grad()
 def transform_image_3d(img_filepath, midas_model, midas_transform, device, rot_mat=torch.eye(3).unsqueeze(0), translate=(0.,0.,-0.04), near=2000, far=20000, fov_deg=60, padding_mode='border', sampling_mode='bicubic', midas_weight = 0.3):
@ -33,11 +34,17 @@ def transform_image_3d(img_filepath, midas_model, midas_transform, device, rot_m
        if image_pil_area > MAX_ADABINS_AREA:
            scale = math.sqrt(MAX_ADABINS_AREA) / math.sqrt(image_pil_area)
            depth_input = img_pil.resize((int(w*scale), int(h*scale)), Image.LANCZOS) # LANCZOS is supposed to be good for downsampling.
+        elif image_pil_area < MIN_ADABINS_AREA:
+            scale = math.sqrt(MIN_ADABINS_AREA) / math.sqrt(image_pil_area)
+            depth_input = img_pil.resize((int(w*scale), int(h*scale)), Image.BICUBIC)
        else:
            depth_input = img_pil
        try:
            _, adabins_depth = infer_helper.predict_pil(depth_input)
-            adabins_depth = torchvision.transforms.functional.resize(torch.from_numpy(adabins_depth), image_tensor.shape[-2:], interpolation=torchvision.transforms.functional.InterpolationMode.BICUBIC).squeeze().to(device)
+            if image_pil_area != MAX_ADABINS_AREA:
+                adabins_depth = torchvision.transforms.functional.resize(torch.from_numpy(adabins_depth), image_tensor.shape[-2:], interpolation=torchvision.transforms.functional.InterpolationMode.BICUBIC).squeeze().to(device)
+            else:
+                adabins_depth = torch.from_numpy(adabins_depth).squeeze().to(device)
            adabins_depth_np = adabins_depth.cpu().numpy()
        except:
            pass