Merge pull request #38 from alembics/allow-lowres-3d-animations

Allow low-res 3D animations. i.e. upscale for AdaBins
pull/39/head
Adam Letts 3 years ago committed by GitHub
commit ad97d368c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 34
      Disco_Diffusion.ipynb
  2. 30
      disco.py
  3. 9
      disco_xform_utils.py

@ -296,25 +296,25 @@
"`image_prompts` | Think of these images more as a description of their contents. | N/A\n",
"**Image quality:**\n",
"`clip_guidance_scale` | Controls how much the image should look like the prompt. | 1000\n",
"`tv_scale` | Controls the smoothness of the final output. | 150\n",
"`range_scale` | Controls how far out of range RGB values are allowed to be. | 150\n",
"`tv_scale` | Controls the smoothness of the final output. | 150\n",
"`range_scale` | Controls how far out of range RGB values are allowed to be. | 150\n",
"`sat_scale` | Controls how much saturation is allowed. From nshepperd's JAX notebook. | 0\n",
"`cutn` | Controls how many crops to take from the image. | 16\n",
"`cutn_batches` | Accumulate CLIP gradient from multiple batches of cuts | 2\n",
"`cutn_batches` | Accumulate CLIP gradient from multiple batches of cuts. | 2\n",
"**Init settings:**\n",
"`init_image` | URL or local path | None\n",
"`init_scale` | This enhances the effect of the init image, a good value is 1000 | 0\n",
"`init_image` | URL or local path | None\n",
"`init_scale` | This enhances the effect of the init image, a good value is 1000 | 0\n",
"`skip_steps` | Controls the starting point along the diffusion timesteps | 0\n",
"`perlin_init` | Option to start with random perlin noise | False\n",
"`perlin_mode` | ('gray', 'color') | 'mixed'\n",
"`perlin_init` | Option to start with random perlin noise | False\n",
"`perlin_mode` | ('gray', 'color') | 'mixed'\n",
"**Advanced:**\n",
"`skip_augs` |Controls whether to skip torchvision augmentations | False\n",
"`randomize_class` |Controls whether the imagenet class is randomly changed each iteration | True\n",
"`clip_denoised` |Determines whether CLIP discriminates a noisy or denoised image | False\n",
"`clamp_grad` |Experimental: Using adaptive clip grad in the cond_fn | True\n",
"`skip_augs` | Controls whether to skip torchvision augmentations | False\n",
"`randomize_class` | Controls whether the imagenet class is randomly changed each iteration | True\n",
"`clip_denoised` | Determines whether CLIP discriminates a noisy or denoised image | False\n",
"`clamp_grad` | Experimental: Using adaptive clip grad in the cond_fn | True\n",
"`seed` | Choose a random seed and print it at end of run for reproduction | random_seed\n",
"`fuzzy_prompt` | Controls whether to add multiple noisy prompts to the prompt losses | False\n",
"`rand_mag` |Controls the magnitude of the random noise | 0.1\n",
"`rand_mag` | Controls the magnitude of the random noise | 0.1\n",
"`eta` | DDIM hyperparameter | 0.5\n",
"\n",
"..\n",
@ -325,10 +325,10 @@
"Setting | Description | Default\n",
"--- | --- | ---\n",
"**Diffusion:**\n",
"`timestep_respacing` | Modify this value to decrease the number of timesteps. | ddim100\n",
"`timestep_respacing` | Modify this value to decrease the number of timesteps. | ddim100\n",
"`diffusion_steps` || 1000\n",
"**Diffusion:**\n",
"`clip_models` | Models of CLIP to load. Typically the more, the better but they all come at a hefty VRAM cost. | ViT-B/32, ViT-B/16, RN50x4"
"`clip_models` | Models of CLIP to load. Typically the more, the better but they all come at a hefty VRAM cost. | ViT-B/32, ViT-B/16, RN50x4"
]
},
{
@ -1671,7 +1671,7 @@
" alphas, sigmas = map(partial(append_dims, n=v.ndim), t_to_alpha_sigma(t))\n",
" pred = input * alphas - v * sigmas\n",
" eps = input * sigmas + v * alphas\n",
" return DiffusionOutput(v, pred, eps)\n"
" return DiffusionOutput(v, pred, eps)"
],
"outputs": [],
"execution_count": null
@ -1894,7 +1894,7 @@
"\n",
"#Make folder for batch\n",
"batchFolder = f'{outDirPath}/{batch_name}'\n",
"createPath(batchFolder)\n"
"createPath(batchFolder)"
],
"outputs": [],
"execution_count": null
@ -2686,4 +2686,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

@ -281,25 +281,25 @@ Setting | Description | Default
`image_prompts` | Think of these images more as a description of their contents. | N/A
**Image quality:**
`clip_guidance_scale` | Controls how much the image should look like the prompt. | 1000
`tv_scale` | Controls the smoothness of the final output. | 150
`range_scale` | Controls how far out of range RGB values are allowed to be. | 150
`tv_scale` | Controls the smoothness of the final output. | 150
`range_scale` | Controls how far out of range RGB values are allowed to be. | 150
`sat_scale` | Controls how much saturation is allowed. From nshepperd's JAX notebook. | 0
`cutn` | Controls how many crops to take from the image. | 16
`cutn_batches` | Accumulate CLIP gradient from multiple batches of cuts | 2
`cutn_batches` | Accumulate CLIP gradient from multiple batches of cuts. | 2
**Init settings:**
`init_image` | URL or local path | None
`init_scale` | This enhances the effect of the init image, a good value is 1000 | 0
`skip_steps Controls the starting point along the diffusion timesteps | 0
`perlin_init` | Option to start with random perlin noise | False
`perlin_mode` | ('gray', 'color') | 'mixed'
`init_image` | URL or local path | None
`init_scale` | This enhances the effect of the init image, a good value is 1000 | 0
`skip_steps` | Controls the starting point along the diffusion timesteps | 0
`perlin_init` | Option to start with random perlin noise | False
`perlin_mode` | ('gray', 'color') | 'mixed'
**Advanced:**
`skip_augs` |Controls whether to skip torchvision augmentations | False
`randomize_class` |Controls whether the imagenet class is randomly changed each iteration | True
`clip_denoised` |Determines whether CLIP discriminates a noisy or denoised image | False
`clamp_grad` |Experimental: Using adaptive clip grad in the cond_fn | True
`skip_augs` | Controls whether to skip torchvision augmentations | False
`randomize_class` | Controls whether the imagenet class is randomly changed each iteration | True
`clip_denoised` | Determines whether CLIP discriminates a noisy or denoised image | False
`clamp_grad` | Experimental: Using adaptive clip grad in the cond_fn | True
`seed` | Choose a random seed and print it at end of run for reproduction | random_seed
`fuzzy_prompt` | Controls whether to add multiple noisy prompts to the prompt losses | False
`rand_mag` |Controls the magnitude of the random noise | 0.1
`rand_mag` | Controls the magnitude of the random noise | 0.1
`eta` | DDIM hyperparameter | 0.5
..
@ -310,10 +310,10 @@ Setting | Description | Default
Setting | Description | Default
--- | --- | ---
**Diffusion:**
`timestep_respacing` | Modify this value to decrease the number of timesteps. | ddim100
`timestep_respacing` | Modify this value to decrease the number of timesteps. | ddim100
`diffusion_steps` || 1000
**Diffusion:**
`clip_models` | Models of CLIP to load. Typically the more, the better but they all come at a hefty VRAM cost. | ViT-B/32, ViT-B/16, RN50x4
`clip_models` | Models of CLIP to load. Typically the more, the better but they all come at a hefty VRAM cost. | ViT-B/32, ViT-B/16, RN50x4
"""
# %%

@ -12,6 +12,7 @@ except:
sys.exit()
MAX_ADABINS_AREA = 500000
MIN_ADABINS_AREA = 448*448
@torch.no_grad()
def transform_image_3d(img_filepath, midas_model, midas_transform, device, rot_mat=torch.eye(3).unsqueeze(0), translate=(0.,0.,-0.04), near=2000, far=20000, fov_deg=60, padding_mode='border', sampling_mode='bicubic', midas_weight = 0.3):
@ -33,11 +34,17 @@ def transform_image_3d(img_filepath, midas_model, midas_transform, device, rot_m
if image_pil_area > MAX_ADABINS_AREA:
scale = math.sqrt(MAX_ADABINS_AREA) / math.sqrt(image_pil_area)
depth_input = img_pil.resize((int(w*scale), int(h*scale)), Image.LANCZOS) # LANCZOS is supposed to be good for downsampling.
elif image_pil_area < MIN_ADABINS_AREA:
scale = math.sqrt(MIN_ADABINS_AREA) / math.sqrt(image_pil_area)
depth_input = img_pil.resize((int(w*scale), int(h*scale)), Image.BICUBIC)
else:
depth_input = img_pil
try:
_, adabins_depth = infer_helper.predict_pil(depth_input)
adabins_depth = torchvision.transforms.functional.resize(torch.from_numpy(adabins_depth), image_tensor.shape[-2:], interpolation=torchvision.transforms.functional.InterpolationMode.BICUBIC).squeeze().to(device)
if image_pil_area != MAX_ADABINS_AREA:
adabins_depth = torchvision.transforms.functional.resize(torch.from_numpy(adabins_depth), image_tensor.shape[-2:], interpolation=torchvision.transforms.functional.InterpolationMode.BICUBIC).squeeze().to(device)
else:
adabins_depth = torch.from_numpy(adabins_depth).squeeze().to(device)
adabins_depth_np = adabins_depth.cpu().numpy()
except:
pass

Loading…
Cancel
Save