v0.8

volotat · May 11, 2023 · 14534d3 · 14534d3
1 parent c987f64
commit 14534d3
Show file tree

Hide file tree

Showing 7 changed files with 259 additions and 234 deletions.
diff --git a/FloweR/model.py b/FloweR/model.py
@@ -18,43 +18,43 @@ def __init__(self, input_size = (384, 384), window_size = 4):
       nn.ReLU(),
     ) # 384 x 384 x 128
 
-    self.conv_block_2 = nn.Sequential(
+    self.conv_block_2 = nn.Sequential( # x128
       nn.AvgPool2d(2),
       nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
       nn.ReLU(),
     ) # 192 x 192 x 128
 
-    self.conv_block_3 = nn.Sequential(
+    self.conv_block_3 = nn.Sequential( # x64
       nn.AvgPool2d(2),
       nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
       nn.ReLU(),
     ) # 96 x 96 x 128
 
-    self.conv_block_4 = nn.Sequential(
+    self.conv_block_4 = nn.Sequential( # x32
       nn.AvgPool2d(2),
       nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
       nn.ReLU(),
     ) # 48 x 48 x 128
 
-    self.conv_block_5 = nn.Sequential(
+    self.conv_block_5 = nn.Sequential( # x16
       nn.AvgPool2d(2),
       nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
       nn.ReLU(),
     ) # 24 x 24 x 128
 
-    self.conv_block_6 = nn.Sequential(
+    self.conv_block_6 = nn.Sequential( # x8
       nn.AvgPool2d(2),
       nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
       nn.ReLU(),
     ) # 12 x 12 x 128
 
-    self.conv_block_7 = nn.Sequential(
+    self.conv_block_7 = nn.Sequential( # x4
       nn.AvgPool2d(2),
       nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
       nn.ReLU(),
     ) # 6 x 6 x 128
 
-    self.conv_block_8 = nn.Sequential(
+    self.conv_block_8 = nn.Sequential( # x2
       nn.AvgPool2d(2),
       nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
       nn.ReLU(),

diff --git a/readme.md b/readme.md
@@ -54,8 +54,12 @@ All examples you can see here are originally generated at 512x512 resolution usi
 ## Installing the extension
 To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui), then go to 'Install from URL' tab. In 'URL for extension's git repository' field inter the path to this repository, i.e. 'https://github.com/volotat/SD-CN-Animation.git'. Leave 'Local directory name' field empty. Then just press 'Install' button. Restart web-ui, new 'SD-CN-Animation' tab should appear. All generated video will be saved into 'stable-diffusion-webui/outputs/sd-cn-animation' folder.
 
-## Last version changes: v0.7
-* Text to Video mode added to the extension
-* 'Generate' button is now automatically disabled while the video is generated
-* Added 'Interrupt' button that allows to stop video generation process
-* Now all necessary models are automatically downloaded. No need for manual preparation.
+## Last version changes: v0.8
+* Better error handling. Fixes an issue when errors may not appear in the console.
+* Fixed an issue with deprecated variables. Should be a resolution of running the extension on other webui forks.
+* Slight improvements in vid2vid processing pipeline.
+* Video preview added to the UI. It will become available at the end of the processing.
+* Time elapsed/left indication added.
+* Fixed an issue with color drifting on some models.
+* Sampler type and sampling steps settings added to text2video mode.
+* Added automatic resizing before processing with RAFT and FloweR models.
diff --git a/scripts/base_ui.py b/scripts/base_ui.py
@@ -28,6 +28,7 @@
 from modules.ui import setup_progressbar, create_sampler_and_steps_selection, ordered_ui_categories, create_output_panel
 
 from core import vid2vid, txt2vid, utils
+import traceback
 
 def V2VArgs():
     seed = -1
@@ -62,15 +63,17 @@ def setup_common_values(mode, d):
     with gr.Row(elem_id=f'{mode}_n_prompt_toprow'):
         n_prompt = gr.Textbox(label='Negative prompt', lines=3, interactive=True, elem_id=f"{mode}_n_prompt", value=d.n_prompt)
     with gr.Row():
-        #steps = gr.Slider(label='Steps', minimum=1, maximum=100, step=1, value=d.steps, interactive=True)
         cfg_scale = gr.Slider(label='CFG scale', minimum=1, maximum=100, step=1, value=d.cfg_scale, interactive=True)
     with gr.Row():
         seed = gr.Number(label='Seed (this parameter controls how the first frame looks like and the color distribution of the consecutive frames as they are dependent on the first one)', value = d.seed, Interactive = True, precision=0)
     with gr.Row():
         processing_strength = gr.Slider(label="Processing strength", value=d.processing_strength, minimum=0, maximum=1, step=0.05, interactive=True)
         fix_frame_strength = gr.Slider(label="Fix frame strength", value=d.fix_frame_strength, minimum=0, maximum=1, step=0.05, interactive=True)
+    with gr.Row():
+        sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"{mode}_sampling", choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index", interactive=True)
+        steps = gr.Slider(label="Sampling steps", minimum=1, maximum=150, step=1, elem_id=f"{mode}_steps", value=d.steps, interactive=True)
 
-    return width, height, prompt, n_prompt, cfg_scale, seed, processing_strength, fix_frame_strength
+    return width, height, prompt, n_prompt, cfg_scale, seed, processing_strength, fix_frame_strength, sampler_index, steps
 
 def inputs_ui():
     v2v_args = SimpleNamespace(**V2VArgs())
@@ -83,29 +86,17 @@ def inputs_ui():
                 gr.HTML('Put your video here')
             with gr.Row():
                 v2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="vid_to_vid_chosen_file")
-                #init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", image_mode="RGBA")
-            #with gr.Row():
-            #    gr.HTML('Alternative: enter the relative (to the webui) path to the file')
-            #with gr.Row():
-            #    vid2vid_frames_path = gr.Textbox(label="Input video path", interactive=True, elem_id="vid_to_vid_chosen_path", placeholder='Enter your video path here, or upload in the box above ^')
-
-            v2v_width, v2v_height, v2v_prompt, v2v_n_prompt, v2v_cfg_scale, v2v_seed, v2v_processing_strength, v2v_fix_frame_strength = setup_common_values('vid2vid', v2v_args)
 
-            with FormRow(elem_id=f"sampler_selection_v2v"):
-                v2v_sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"v2v_sampling", choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
-                v2v_steps = gr.Slider(minimum=1, maximum=150, step=1, elem_id=f"v2v_steps", label="Sampling steps", value=15)
+            v2v_width, v2v_height, v2v_prompt, v2v_n_prompt, v2v_cfg_scale, v2v_seed, v2v_processing_strength, v2v_fix_frame_strength, v2v_sampler_index, v2v_steps = setup_common_values('vid2vid', v2v_args)
 
             with FormRow(elem_id="vid2vid_override_settings_row") as row:
                 v2v_override_settings = create_override_settings_dropdown("vid2vid", row)
 
             with FormGroup(elem_id=f"script_container"):
                 v2v_custom_inputs = scripts.scripts_img2img.setup_ui()
-            #with gr.Row():
-            #    strength = gr.Slider(label="denoising strength", value=d.strength, minimum=0, maximum=1, step=0.05, interactive=True)
-            #    vid2vid_startFrame=gr.Number(label='vid2vid start frame',value=d.vid2vid_startFrame)
 
         with gr.Tab('txt2vid') as tab_txt2vid:
-            t2v_width, t2v_height, t2v_prompt, t2v_n_prompt, t2v_cfg_scale, t2v_seed, t2v_processing_strength, t2v_fix_frame_strength = setup_common_values('txt2vid', t2v_args)
+            t2v_width, t2v_height, t2v_prompt, t2v_n_prompt, t2v_cfg_scale, t2v_seed, t2v_processing_strength, t2v_fix_frame_strength, t2v_sampler_index, t2v_steps = setup_common_values('txt2vid', t2v_args)
             with gr.Row():
                 t2v_length = gr.Slider(label='Length (in frames)', minimum=10, maximum=2048, step=10, value=40, interactive=True)
                 t2v_fps = gr.Slider(label='Video FPS', minimum=4, maximum=64, step=4, value=12, interactive=True)
@@ -117,12 +108,22 @@ def inputs_ui():
     return locals()
 
 def process(*args):
-    if args[0] == 'vid2vid':
-        yield from vid2vid.start_process(*args)
-    elif args[0] == 'txt2vid':
-        yield from txt2vid.start_process(*args)
-    else:
-        raise Exception(f"Unsupported processing mode: '{args[0]}'")
+    msg = 'Done'
+    try:    
+        if args[0] == 'vid2vid':
+            yield from vid2vid.start_process(*args)
+        elif args[0] == 'txt2vid':
+            yield from txt2vid.start_process(*args)
+        else:
+            msg = f"Unsupported processing mode: '{args[0]}'"
+            raise Exception(msg)
+    except Exception as error:
+        # handle the exception
+        msg = f"An exception occurred while trying to process the frame: {error}"
+        print(msg)
+        traceback.print_exc()
+
+    yield msg, gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Video.update(), gr.Button.update(interactive=True), gr.Button.update(interactive=False)
 
 def stop_process(*args):
     utils.shared.is_interrupted = True
@@ -140,18 +141,6 @@ def on_ui_tabs():
             with gr.Column(scale=1, variant='panel'):
                 with gr.Tabs():
                     components = inputs_ui()
-
-                    #for category in ordered_ui_categories():
-                    #    if category == "sampler":
-                    #        steps, sampler_index = create_sampler_and_steps_selection(samplers_for_img2img, "vid2vid")
-
-                    #    elif category == "override_settings":
-                    #        with FormRow(elem_id="vid2vid_override_settings_row") as row:
-                    #            override_settings = create_override_settings_dropdown("vid2vid", row)
-
-                    #    elif category == "scripts":
-                    #        with FormGroup(elem_id=f"script_container"):
-                    #            custom_inputs = scripts.scripts_img2img.setup_ui()
 
             with gr.Column(scale=1, variant='compact'):
                 with gr.Row(variant='compact'):
@@ -172,7 +161,8 @@ def on_ui_tabs():
                         img_preview_prev_warp = gr.Image(label='Previous frame warped', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
                         img_preview_processed = gr.Image(label='Processed', elem_id=f"img_preview_processed", type='pil').style(height=240)
 
-                    html_log = gr.HTML(elem_id=f'html_log_vid2vid')
+                    # html_log = gr.HTML(elem_id=f'html_log_vid2vid')
+                    video_preview = gr.Video(interactive=False)
 
                 with gr.Row(variant='compact'):
                     dummy_component = gr.Label(visible=False)
@@ -186,9 +176,9 @@ def on_ui_tabs():
                 img_preview_curr_occl,
                 img_preview_prev_warp,
                 img_preview_processed,
-                html_log,
+                video_preview,
                 run_button,
-                stop_button
+                stop_button,
             ]
 
             run_button.click(

diff --git a/scripts/core/flow_utils.py b/scripts/core/flow_utils.py
@@ -43,6 +43,11 @@ def RAFT_clear_memory():
 def RAFT_estimate_flow(frame1, frame2, device='cuda', subtract_background=True):
   global RAFT_model
 
+  org_size = frame1.shape[1], frame1.shape[0]
+  size = frame1.shape[1] // 16 * 16, frame1.shape[0] // 16 * 16
+  frame1 = cv2.resize(frame1, size)
+  frame2 = cv2.resize(frame2, size)
+
   model_path = ph.models_path + '/RAFT/raft-things.pth'
   remote_model_path = 'https://drive.google.com/uc?id=1MqDajR89k-xLV0HIrmJ0k-n8ZpG6_suM'
 
@@ -67,9 +72,9 @@ def RAFT_estimate_flow(frame1, frame2, device='cuda', subtract_background=True):
     RAFT_model.to(device)
     RAFT_model.eval()
 
-  if subtract_background:
-    frame1 = background_subtractor(frame1, fgbg)
-    frame2 = background_subtractor(frame2, fgbg)
+  #if subtract_background:
+  #  frame1 = background_subtractor(frame1, fgbg)
+  #  frame2 = background_subtractor(frame2, fgbg)
 
   with torch.no_grad():
     frame1_torch = torch.from_numpy(frame1).permute(2, 0, 1).float()[None].to(device)
@@ -90,10 +95,10 @@ def RAFT_estimate_flow(frame1, frame2, device='cuda', subtract_background=True):
 
     occlusion_mask = fb_norm[..., None].repeat(3, axis=-1)
 
-  return next_flow, prev_flow, occlusion_mask, frame1, frame2
-
-# ... rest of the file ...
+  next_flow = cv2.resize(next_flow, org_size)
+  prev_flow = cv2.resize(prev_flow, org_size)
 
+  return next_flow, prev_flow, occlusion_mask #, frame1, frame2
 
 def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_styled):
   h, w = cur_frame.shape[:2]
@@ -144,7 +149,7 @@ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_sty
   #diff_mask_stl = np.abs(warped_frame_styled.astype(np.float32) - cur_frame.astype(np.float32)) / 255
   #diff_mask_stl = diff_mask_stl.max(axis = -1, keepdims=True)
 
-  alpha_mask = np.maximum(occlusion_mask * 0.3, diff_mask_org * 3) #, diff_mask_stl * 2
+  alpha_mask = np.maximum(occlusion_mask * 0.3, diff_mask_org * 4) #, diff_mask_stl * 2
   alpha_mask = alpha_mask.repeat(3, axis = -1)
 
   #alpha_mask_blured = cv2.dilate(alpha_mask, np.ones((5, 5), np.float32))