@ -16,7 +16,7 @@
"id": "TitleTop"
},
"source": [
"# Disco Diffusion v5 - Now with 3D animation \n",
"# Disco Diffusion v5.1 - Now with Turbo \n",
"\n",
"In case of confusion, Disco is the name of this notebook edit. The diffusion model in use is Katherine Crowson's fine-tuned 512x512 model\n",
"\n",
@ -58,7 +58,9 @@
"\n",
"Somnai (https://twitter.com/Somnai_dreams) added Diffusion Animation techniques, QoL improvements and various implementations of tech and techniques, mostly listed in the changelog below.\n",
"\n",
"3D animation implementation added by Adam Letts (https://twitter.com/gandamu_ml) in collaboration with Somnai."
"3D animation implementation added by Adam Letts (https://twitter.com/gandamu_ml) in collaboration with Somnai.\n",
"\n",
"Turbo feature by Chris Allen (https://twitter.com/zippy731)"
]
},
{
@ -235,10 +237,22 @@
"\n",
" Separated transform code\n",
"\n",
" v5.01 Update: Match 10th 2022 - gandamu / Adam Letts\n",
" v5.01 Update: Mar 10th 2022 - gandamu / Adam Letts\n",
"\n",
" IPython magic commands replaced by Python code\n",
"\n",
" v5.1 Update: Mar 30th 2022 - zippy / Chris Allen and gandamu / Adam Letts\n",
"\n",
" Integrated Turbo+Smooth features from Disco Diffusion Turbo -- just the implementation, without its defaults.\n",
"\n",
" Implemented resume of turbo animations in such a way that it's now possible to resume from different batch folders and batch numbers.\n",
"\n",
" 3D rotation parameter units are now degrees (rather than radians)\n",
"\n",
" Corrected name collision in sampling_mode (now diffusion_sampling_mode for plms/ddim, and sampling_mode for 3D transform sampling)\n",
"\n",
" Added video_init_seed_continuity option to make init video animations more continuous\n",
"\n",
" '''\n",
" )\n"
],
@ -392,14 +406,8 @@
" root_path = '.'\n",
"\n",
"import os\n",
"from os import path\n",
"#Simple create paths taken with modifications from Datamosh's Batch VQGAN+CLIP notebook\n",
"def createPath(filepath):\n",
" if path.exists(filepath) == False:\n",
" os.makedirs(filepath)\n",
" print(f'Made {filepath}')\n",
" else:\n",
" print(f'filepath {filepath} exists.')\n",
" os.makedirs(filepath, exist_ok=True)\n",
"\n",
"initDirPath = f'{root_path}/init_images'\n",
"createPath(initDirPath)\n",
@ -432,7 +440,6 @@
"source": [
"#@title ### 1.3 Install and import dependencies\n",
"\n",
"from os.path import exists as path_exists\n",
"import pathlib, shutil\n",
"\n",
"if not is_colab:\n",
@ -476,9 +483,9 @@
" except:\n",
" pass\n",
"\n",
"if not path_ exists(f'{model_path}'):\n",
"if not os.path. exists(f'{model_path}'):\n",
" pathlib.Path(model_path).mkdir(parents=True, exist_ok=True)\n",
"if not path_ exists(f'{model_path}/dpt_large-midas-2f21e586.pt'):\n",
"if not os.path. exists(f'{model_path}/dpt_large-midas-2f21e586.pt'):\n",
" wget(\"https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt\", model_path)\n",
"\n",
"import sys\n",
@ -575,7 +582,7 @@
"if USE_ADABINS:\n",
" if is_colab:\n",
" gitclone(\"https://github.com/shariqfarooq123/AdaBins.git\")\n",
" if not path_ exists(f'{model_path}/AdaBins_nyu.pt'):\n",
" if not os.path. exists(f'{model_path}/AdaBins_nyu.pt'):\n",
" wget(\"https://cloudflare-ipfs.com/ipfs/Qmd2mMnDLWePKmgfS8m6ntAg4nhV5VkUyAydYBp8cWWeB7/AdaBins_nyu.pt\", model_path)\n",
" pathlib.Path(\"pretrained\").mkdir(parents=True, exist_ok=True)\n",
" shutil.copyfile(f\"{model_path}/AdaBins_nyu.pt\", \"pretrained/AdaBins_nyu.pt\")\n",
@ -1003,6 +1010,37 @@
"\n",
"stop_on_next_loop = False # Make sure GPU memory doesn't get corrupted from cancelling the run mid-way through, allow a full frame to complete\n",
"\n",
"def do_3d_step(img_filepath, frame_num, midas_model, midas_transform):\n",
" if args.key_frames:\n",
" translation_x = args.translation_x_series[frame_num]\n",
" translation_y = args.translation_y_series[frame_num]\n",
" translation_z = args.translation_z_series[frame_num]\n",
" rotation_3d_x = args.rotation_3d_x_series[frame_num]\n",
" rotation_3d_y = args.rotation_3d_y_series[frame_num]\n",
" rotation_3d_z = args.rotation_3d_z_series[frame_num]\n",
" print(\n",
" f'translation_x: {translation_x}',\n",
" f'translation_y: {translation_y}',\n",
" f'translation_z: {translation_z}',\n",
" f'rotation_3d_x: {rotation_3d_x}',\n",
" f'rotation_3d_y: {rotation_3d_y}',\n",
" f'rotation_3d_z: {rotation_3d_z}',\n",
" )\n",
"\n",
" trans_scale = 1.0/200.0\n",
" translate_xyz = [-translation_x*trans_scale, translation_y*trans_scale, -translation_z*trans_scale]\n",
" rotate_xyz_degrees = [rotation_3d_x, rotation_3d_y, rotation_3d_z]\n",
" print('translation:',translate_xyz)\n",
" print('rotation:',rotate_xyz_degrees)\n",
" rotate_xyz = [math.radians(rotate_xyz_degrees[0]), math.radians(rotate_xyz_degrees[1]), math.radians(rotate_xyz_degrees[2])]\n",
" rot_mat = p3dT.euler_angles_to_matrix(torch.tensor(rotate_xyz, device=device), \"XYZ\").unsqueeze(0)\n",
" print(\"rot_mat: \" + str(rot_mat))\n",
" next_step_pil = dxf.transform_image_3d(img_filepath, midas_model, midas_transform, DEVICE,\n",
" rot_mat, translate_xyz, args.near_plane, args.far_plane,\n",
" args.fov, padding_mode=args.padding_mode,\n",
" sampling_mode=args.sampling_mode, midas_weight=args.midas_weight)\n",
" return next_step_pil\n",
"\n",
"def do_run():\n",
" seed = args.seed\n",
" print(range(args.start_frame, args.max_frames))\n",
@ -1045,7 +1083,7 @@
" )\n",
" \n",
" if frame_num > 0:\n",
" seed = seed + 1 \n",
" seed + = 1\n",
" if resume_run and frame_num == start_frame:\n",
" img_0 = cv2.imread(batchFolder+f\"/{batch_name}({batchNum})_{start_frame-1:04}.png\")\n",
" else:\n",
@ -1072,50 +1110,55 @@
" skip_steps = args.calc_frames_skip_steps\n",
"\n",
" if args.animation_mode == \"3D\":\n",
" if args.key_frames:\n",
" angle = args.angle_series[frame_num]\n",
" #zoom = args.zoom_series[frame_num]\n",
" translation_x = args.translation_x_series[frame_num]\n",
" translation_y = args.translation_y_series[frame_num]\n",
" translation_z = args.translation_z_series[frame_num]\n",
" rotation_3d_x = args.rotation_3d_x_series[frame_num]\n",
" rotation_3d_y = args.rotation_3d_y_series[frame_num]\n",
" rotation_3d_z = args.rotation_3d_z_series[frame_num]\n",
" print(\n",
" f'angle: {angle}',\n",
" #f'zoom: {zoom}',\n",
" f'translation_x: {translation_x}',\n",
" f'translation_y: {translation_y}',\n",
" f'translation_z: {translation_z}',\n",
" f'rotation_3d_x: {rotation_3d_x}',\n",
" f'rotation_3d_y: {rotation_3d_y}',\n",
" f'rotation_3d_z: {rotation_3d_z}',\n",
" )\n",
"\n",
" if frame_num > 0:\n",
" seed = seed + 1 \n",
" if frame_num == 0:\n",
" turbo_blend = False\n",
" else:\n",
" seed += 1 \n",
" if resume_run and frame_num == start_frame:\n",
" img_filepath = batchFolder+f\"/{batch_name}({batchNum})_{start_frame-1:04}.png\"\n",
" if turbo_mode and frame_num > turbo_preroll:\n",
" shutil.copyfile(img_filepath, 'oldFrameScaled.png')\n",
" else:\n",
" img_filepath = '/content/prevFrame.png' if is_colab else 'prevFrame.png'\n",
" trans_scale = 1.0/200.0\n",
" translate_xyz = [-translation_x*trans_scale, translation_y*trans_scale, -translation_z*trans_scale]\n",
" rotate_xyz = [rotation_3d_x, rotation_3d_y, rotation_3d_z]\n",
" print('translation:',translate_xyz)\n",
" print('rotation:',rotate_xyz)\n",
" rot_mat = p3dT.euler_angles_to_matrix(torch.tensor(rotate_xyz, device=device), \"XYZ\").unsqueeze(0)\n",
" print(\"rot_mat: \" + str(rot_mat))\n",
" next_step_pil = dxf.transform_image_3d(img_filepath, midas_model, midas_transform, DEVICE,\n",
" rot_mat, translate_xyz, args.near_plane, args.far_plane,\n",
" args.fov, padding_mode=args.padding_mode,\n",
" sampling_mode=args.sampling_mode, midas_weight=args.midas_weight)\n",
"\n",
" next_step_pil = do_3d_step(img_filepath, frame_num, midas_model, midas_transform)\n",
" next_step_pil.save('prevFrameScaled.png')\n",
"\n",
" ### Turbo mode - skip some diffusions, use 3d morph for clarity and to save time\n",
" turbo_blend = False # default for non-turbo frame saving\n",
" if turbo_mode:\n",
" if frame_num == turbo_preroll: #start tracking oldframe\n",
" next_step_pil.save('oldFrameScaled.png')#stash for later blending \n",
" elif frame_num > turbo_preroll:\n",
" #set up 2 warped image sequences, old & new, to blend toward new diff image\n",
" old_frame = do_3d_step('oldFrameScaled.png', frame_num, midas_model, midas_transform)\n",
" old_frame.save('oldFrameScaled.png')\n",
" if frame_num % int(turbo_steps) != 0: \n",
" print('turbo skip this frame: skipping clip diffusion steps')\n",
" filename = f'{args.batch_name}({args.batchNum})_{frame_num:04}.png'\n",
" blend_factor = ((frame_num % int(turbo_steps))+1)/int(turbo_steps)\n",
" print('turbo skip this frame: skipping clip diffusion steps and saving blended frame')\n",
" newWarpedImg = cv2.imread('prevFrameScaled.png')#this is already updated..\n",
" oldWarpedImg = cv2.imread('oldFrameScaled.png')\n",
" blendedImage = cv2.addWeighted(newWarpedImg, blend_factor, oldWarpedImg,1-blend_factor, 0.0)\n",
" cv2.imwrite(f'{batchFolder}/{filename}',blendedImage)\n",
" next_step_pil.save(f'{img_filepath}') # save it also as prev_frame to feed next iteration\n",
" turbo_blend = False\n",
" continue\n",
" else:\n",
" #if not a skip frame, will run diffusion and need to blend.\n",
" oldWarpedImg = cv2.imread('prevFrameScaled.png')\n",
" cv2.imwrite(f'oldFrameScaled.png',oldWarpedImg)#swap in for blending later \n",
" turbo_blend = True # flag to blend frames after diff generated...\n",
" print('clip/diff this frame - generate clip diff image')\n",
"\n",
" init_image = 'prevFrameScaled.png'\n",
" init_scale = args.frames_scale\n",
" skip_steps = args.calc_frames_skip_steps\n",
"\n",
" if args.animation_mode == \"Video Input\":\n",
" seed = seed + 1 \n",
" if not video_init_seed_continuity:\n",
" seed += 1\n",
" init_image = f'{videoFramesFolder}/{frame_num+1:04}.jpg'\n",
" init_scale = args.frames_scale\n",
" skip_steps = args.calc_frames_skip_steps\n",
@ -1146,7 +1189,7 @@
" else:\n",
" image_prompt = []\n",
"\n",
" print(f'Frame Prompt: {frame_prompt}')\n",
" print(f'Frame {frame_num} Prompt: {frame_prompt}')\n",
"\n",
" model_stats = []\n",
" for clip_model in clip_models:\n",
@ -1273,7 +1316,7 @@
" return grad * magnitude.clamp(max=args.clamp_max) / magnitude #min=-0.02, min=-clamp_max, \n",
" return grad\n",
" \n",
" if args.sampling_mode == 'ddim':\n",
" if args.diffusion_ sampling_mode == 'ddim':\n",
" sample_fn = diffusion.ddim_sample_loop_progressive\n",
" else:\n",
" sample_fn = diffusion.plms_sample_loop_progressive\n",
@ -1296,7 +1339,7 @@
" if perlin_init:\n",
" init = regen_perlin()\n",
"\n",
" if args.sampling_mode == 'ddim':\n",
" if args.diffusion_ sampling_mode == 'ddim':\n",
" samples = sample_fn(\n",
" model,\n",
" (batch_size, 3, args.side_y, args.side_x),\n",
@ -1381,6 +1424,17 @@
" image.save(f'{unsharpenFolder}/{filename}')\n",
" else:\n",
" image.save(f'{batchFolder}/{filename}')\n",
" if args.animation_mode == \"3D\":\n",
" # If turbo_blend, save a blended image\n",
" if turbo_mode and turbo_blend:\n",
" # Mix new image with prevFrameScaled\n",
" newFrame = cv2.imread('prevFrame.png') # This is already updated..\n",
" prev_frame_warped = cv2.imread('prevFrameScaled.png')\n",
" blendedImage = cv2.addWeighted(newFrame, 0.5, prev_frame_warped, 0.5, 0.0)\n",
" cv2.imwrite(f'{batchFolder}/{filename}',blendedImage)\n",
" turbo_blend = False # reset to false\n",
" else:\n",
" image.save(f'{batchFolder}/{filename}')\n",
" # if frame_num != args.max_frames-1:\n",
" # display.clear_output()\n",
"\n",
@ -1428,7 +1482,7 @@
" 'use_secondary_model': use_secondary_model,\n",
" 'steps': steps,\n",
" 'diffusion_steps': diffusion_steps,\n",
" 'sampling_mode': sampling_mode,\n",
" 'diffusion_ sampling_mode': diffusion_ sampling_mode,\n",
" 'ViTB32': ViTB32,\n",
" 'ViTB16': ViTB16,\n",
" 'ViTL14': ViTL14,\n",
@ -1460,6 +1514,11 @@
" 'sampling_mode': sampling_mode,\n",
" 'video_init_path':video_init_path,\n",
" 'extract_nth_frame':extract_nth_frame,\n",
" 'video_init_seed_continuity': video_init_seed_continuity,\n",
" 'turbo_mode':turbo_mode,\n",
" 'turbo_steps':turbo_steps,\n",
" 'turbo_preroll':turbo_preroll,\n",
" 'turbo_frame_blend':turbo_frame_blend,\n",
" }\n",
" # print('Settings:', setting_list)\n",
" with open(f\"{batchFolder}/{batch_name}({batchNum})_settings.txt\", \"w+\") as f: #save settings\n",
@ -2193,7 +2252,7 @@
"#@markdown ####**Models Settings:**\n",
"diffusion_model = \"512x512_diffusion_uncond_finetune_008100\" #@param [\"256x256_diffusion_uncond\", \"512x512_diffusion_uncond_finetune_008100\"]\n",
"use_secondary_model = True #@param {type: 'boolean'}\n",
"sampling_mode = 'ddim' #@param ['plms','ddim'] \n",
"diffusion_ sampling_mode = 'ddim' #@param ['plms','ddim'] \n",
"\n",
"timestep_respacing = '250' #@param ['25','50','100','150','250','500','1000','ddim25','ddim50', 'ddim75', 'ddim100','ddim150','ddim250','ddim500','ddim1000'] \n",
"diffusion_steps = 1000 #@param {type: 'number'}\n",
@ -2457,7 +2516,8 @@
" video_init_path = \"/content/training.mp4\" #@param {type: 'string'}\n",
"else:\n",
" video_init_path = \"training.mp4\" #@param {type: 'string'}\n",
"extract_nth_frame = 2 #@param {type:\"number\"} \n",
"extract_nth_frame = 2 #@param {type: 'number'}\n",
"video_init_seed_continuity = True #@param {type: 'boolean'}\n",
"\n",
"if animation_mode == \"Video Input\":\n",
" if is_colab:\n",
@ -2480,6 +2540,7 @@
"\n",
"#@markdown ####**2D Animation Settings:**\n",
"#@markdown `zoom` is a multiplier of dimensions, 1 is no zoom.\n",
"#@markdown All rotations are provided in degrees.\n",
"\n",
"key_frames = True #@param {type:\"boolean\"}\n",
"max_frames = 10000#@param {type:\"number\"}\n",
@ -2504,6 +2565,25 @@
"padding_mode = 'border'#@param {type:\"string\"}\n",
"sampling_mode = 'bicubic'#@param {type:\"string\"}\n",
"\n",
"#======= TURBO MODE\n",
"#@markdown ---\n",
"#@markdown ####**Turbo Mode (3D anim only):**\n",
"#@markdown (Starts after frame 10,) skips diffusion steps and just uses depth map to warp images for skipped frames.\n",
"#@markdown Speeds up rendering by 2x-4x, and may improve image coherence between frames. frame_blend_mode smooths abrupt texture changes across 2 frames.\n",
"#@markdown For different settings tuned for Turbo Mode, refer to the original Disco-Turbo Github: https://github.com/zippy731/disco-diffusion-turbo\n",
"\n",
"turbo_mode = False #@param {type:\"boolean\"}\n",
"turbo_steps = \"3\" #@param [\"2\",\"3\",\"4\",\"5\",\"6\"] {type:\"string\"}\n",
"turbo_preroll = 10 # frames\n",
"turbo_frame_blend = True #@param {type:\"boolean\"}\n",
"\n",
"#insist turbo be used only w 3d anim.\n",
"if turbo_mode and animation_mode != '3D':\n",
" print('=====')\n",
" print('Turbo mode only available with 3D animations. Disabling Turbo.')\n",
" print('=====')\n",
" turbo_mode = False\n",
"\n",
"#@markdown ---\n",
"\n",
"#@markdown ####**Coherency Settings:**\n",
@ -2939,8 +3019,12 @@
" batchNum = int(run_to_resume)\n",
" if resume_from_frame == 'latest':\n",
" start_frame = len(glob(batchFolder+f\"/{batch_name}({batchNum})_*.png\"))\n",
" if animation_mode != '3D' and turbo_mode == True and start_frame > turbo_preroll and start_frame % int(turbo_steps) != 0:\n",
" start_frame = start_frame - (start_frame % int(turbo_steps))\n",
" else:\n",
" start_frame = int(resume_from_frame)+1\n",
" if animation_mode != '3D' and turbo_mode == True and start_frame > turbo_preroll and start_frame % int(turbo_steps) != 0:\n",
" start_frame = start_frame - (start_frame % int(turbo_steps))\n",
" if retain_overwritten_frames is True:\n",
" existing_frames = len(glob(batchFolder+f\"/{batch_name}({batchNum})_*.png\"))\n",
" frames_to_save = existing_frames - start_frame\n",
@ -2949,7 +3033,7 @@
"else:\n",
" start_frame = 0\n",
" batchNum = len(glob(batchFolder+\"/*.txt\"))\n",
" while path.isfile(f\"{batchFolder}/{batch_name}({batchNum})_settings.txt\") is True or path.isfile(f\"{batchFolder}/{batch_name}-{batchNum}_settings.txt\") is True:\n",
" while os. path.isfile(f\"{batchFolder}/{batch_name}({batchNum})_settings.txt\") is True or os. path.isfile(f\"{batchFolder}/{batch_name}-{batchNum}_settings.txt\") is True:\n",
" batchNum += 1\n",
"\n",
"print(f'Starting Run: {batch_name}({batchNum}) at frame {start_frame}')\n",
@ -2971,7 +3055,7 @@
" 'batch_size':batch_size,\n",
" 'batch_name': batch_name,\n",
" 'steps': steps,\n",
" 'sampling_mode': sampling_mode,\n",
" 'diffusion_ sampling_mode': diffusion_ sampling_mode,\n",
" 'width_height': width_height,\n",
" 'clip_guidance_scale': clip_guidance_scale,\n",
" 'tv_scale': tv_scale,\n",
@ -2990,6 +3074,7 @@
" 'animation_mode': animation_mode,\n",
" 'video_init_path': video_init_path,\n",
" 'extract_nth_frame': extract_nth_frame,\n",
" 'video_init_seed_continuity': video_init_seed_continuity,\n",
" 'key_frames': key_frames,\n",
" 'max_frames': max_frames if animation_mode != \"None\" else 1,\n",
" 'interp_spline': interp_spline,\n",
@ -3180,7 +3265,7 @@
"ExtraSetTop"
],
"machine_shape": "hm",
"name": "Disco Diffusion v5 [w/ 3D animation ]",
"name": "Disco Diffusion v5.1 [w/ Turbo ]",
"private_outputs": true,
"provenance": [],
"include_colab_link": true