Add CFG End Optimization (carson-katri#617)

* Add execution time below generate * Remove placeholder * Use cfg_end in prompt_to_image * Use fewer decimals * Use first embeddings instead of zero * Extend to other pipelines * Add min/max to cfg_end * Update panel UI * Simplify implementation * Update for batching Co-authored-by: NullSenseStudio <[email protected]> * Update for ControlNet + inpaint --------- Co-authored-by: NullSenseStudio <[email protected]>
phi-line · Apr 25, 2023 · e1e998c · e1e998c
1 parent 8bf0954
commit e1e998c
Show file tree

Hide file tree

Showing 9 changed files with 56 additions and 6 deletions.
diff --git a/__init__.py b/__init__.py
@@ -88,6 +88,7 @@ def get_selection_preview(self):
         bpy.types.Scene.dream_textures_history_selection_preview = bpy.props.StringProperty(name="", default="", get=get_selection_preview, set=lambda _, __: None)
         bpy.types.Scene.dream_textures_progress = bpy.props.IntProperty(name="", default=0, min=0, max=0)
         bpy.types.Scene.dream_textures_info = bpy.props.StringProperty(name="Info")
+        bpy.types.Scene.dream_textures_last_execution_time = bpy.props.StringProperty(name="Last Execution Time", default="")
 
         bpy.types.Scene.dream_textures_viewport_enabled = BoolProperty(name="Viewport Enabled", default=False)
         bpy.types.Scene.dream_textures_render_properties_enabled = BoolProperty(default=False)

diff --git a/generator_process/actions/control_net.py b/generator_process/actions/control_net.py
@@ -353,6 +353,14 @@ def __call__(
                     num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
                     with self.progress_bar(total=num_inference_steps) as progress_bar:
                         for i, t in enumerate(timesteps):
+                            # NOTE: Modified to support disabling CFG
+                            if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']:
+                                do_classifier_free_guidance = False
+                                prompt_embeds = prompt_embeds[prompt_embeds.size(0) // 2:]
+                                image = [i[i.size(0) // 2:] for i in image]
+                                if mask is not None:
+                                    mask = mask[mask.size(0) // 2:]
+                                    masked_image_latents = masked_image_latents[masked_image_latents.size(0) // 2:]
                             # expand the latents if we are doing classifier free guidance
                             latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
                             latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
@@ -522,7 +530,8 @@ def __call__(
                     return_dict=True,
                     callback=None,
                     callback_steps=1,
-                    step_preview_mode=step_preview_mode
+                    step_preview_mode=step_preview_mode,
+                    cfg_end=optimizations.cfg_end
                 )
         case Pipeline.STABILITY_SDK:
             import stability_sdk

diff --git a/generator_process/actions/depth_to_image.py b/generator_process/actions/depth_to_image.py
@@ -277,6 +277,11 @@ def __call__(
                     num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
                     with self.progress_bar(total=num_inference_steps) as progress_bar:
                         for i, t in enumerate(timesteps):
+                            # NOTE: Modified to support disabling CFG
+                            if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']:
+                                do_classifier_free_guidance = False
+                                text_embeddings = text_embeddings[text_embeddings.size(0) // 2:]
+                                depth = depth[depth.size(0) // 2:]
                             # expand the latents if we are doing classifier free guidance
                             latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
 
@@ -385,7 +390,8 @@ def __call__(
                     return_dict=True,
                     callback=None,
                     callback_steps=1,
-                    step_preview_mode=step_preview_mode
+                    step_preview_mode=step_preview_mode,
+                    cfg_end=optimizations.cfg_end
                 )
         case Pipeline.STABILITY_SDK:
             import stability_sdk

diff --git a/generator_process/actions/image_to_image.py b/generator_process/actions/image_to_image.py
@@ -105,6 +105,10 @@ def __call__(
                     num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
                     with self.progress_bar(total=num_inference_steps) as progress_bar:
                         for i, t in enumerate(timesteps):
+                            # NOTE: Modified to support disabling CFG
+                            if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']:
+                                do_classifier_free_guidance = False
+                                text_embeddings = text_embeddings[text_embeddings.size(0) // 2:]
                             # expand the latents if we are doing classifier free guidance
                             latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
                             latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
@@ -131,6 +135,8 @@ def __call__(
                     # TODO: Add UI to enable this
                     # 10. Run safety checker
                     # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype)
+
+                    image = self.image_processor.postprocess(image, output_type=output_type)
 
                     # Offload last model to CPU
                     if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
@@ -139,7 +145,7 @@ def __call__(
                     # NOTE: Modified to yield the decoded image as a numpy array.
                     yield ImageGenerationResult(
                         [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255.
-                            for i, image in enumerate(self.numpy_to_pil(image))],
+                            for i, image in enumerate(image)],
                         [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()],
                         num_inference_steps,
                         True
@@ -199,7 +205,8 @@ def __call__(
                     return_dict=True,
                     callback=None,
                     callback_steps=1,
-                    step_preview_mode=step_preview_mode
+                    step_preview_mode=step_preview_mode,
+                    cfg_end=optimizations.cfg_end
                 )
         case Pipeline.STABILITY_SDK:
             import stability_sdk.client

diff --git a/generator_process/actions/inpaint.py b/generator_process/actions/inpaint.py
@@ -143,6 +143,12 @@ def __call__(
                     num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
                     with self.progress_bar(total=num_inference_steps) as progress_bar:
                         for i, t in enumerate(timesteps):
+                            # NOTE: Modified to support disabling CFG
+                            if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']:
+                                do_classifier_free_guidance = False
+                                text_embeddings = text_embeddings[text_embeddings.size(0) // 2:]
+                                mask = mask[mask.size(0) // 2:]
+                                masked_image_latents = masked_image_latents[masked_image_latents.size(0) // 2:]
                             # expand the latents if we are doing classifier free guidance
                             latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
 
@@ -248,7 +254,8 @@ def __call__(
                     return_dict=True,
                     callback=None,
                     callback_steps=1,
-                    step_preview_mode=step_preview_mode
+                    step_preview_mode=step_preview_mode,
+                    cfg_end=optimizations.cfg_end
                 )
         case Pipeline.STABILITY_SDK:
             import stability_sdk.client

diff --git a/generator_process/actions/prompt_to_image.py b/generator_process/actions/prompt_to_image.py
@@ -160,6 +160,7 @@ class Optimizations:
     vae_tiling: str = "off"
     vae_tile_size: int = 512
     vae_tile_blend: int = 64
+    cfg_end: float = 1.0
 
     cpu_only: bool = False
 
@@ -541,6 +542,11 @@ def __call__(
 
                     # 7. Denoising loop
                     for i, t in enumerate(self.progress_bar(timesteps)):
+                        # NOTE: Modified to support disabling CFG
+                        if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']:
+                            do_classifier_free_guidance = False
+                            text_embeddings = text_embeddings[text_embeddings.size(0) // 2:]
+
                         # expand the latents if we are doing classifier free guidance
                         latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
                         latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
@@ -621,7 +627,8 @@ def __call__(
                     return_dict=True,
                     callback=None,
                     callback_steps=1,
-                    step_preview_mode=step_preview_mode
+                    step_preview_mode=step_preview_mode,
+                    cfg_end=optimizations.cfg_end
                 )
         case Pipeline.STABILITY_SDK:
             import stability_sdk.client

diff --git a/operators/dream_texture.py b/operators/dream_texture.py
@@ -10,6 +10,7 @@
 from ..generator_process import Generator
 from ..generator_process.actions.prompt_to_image import ImageGenerationResult, Pipeline
 from ..generator_process.actions.huggingface_hub import ModelType
+import time
 
 def bpy_image(name, width, height, pixels, existing_image):
     if existing_image is not None and (existing_image.size[0] != width or existing_image.size[1] != height):
@@ -89,8 +90,10 @@ def step_progress_update(self, context):
         scene.dream_textures_info = "Starting..."
 
         last_data_block = None
+        execution_start = time.time()
         def step_callback(_, step_image: ImageGenerationResult):
             nonlocal last_data_block
+            scene.dream_textures_last_execution_time = f"{time.time() - execution_start:.2f} seconds"
             if step_image.final:
                 return
             scene.dream_textures_progress = step_image.step

diff --git a/property_groups/dream_prompt.py b/property_groups/dream_prompt.py
@@ -186,6 +186,8 @@ def optimization(optim, property=None, **kwargs):
                 property = BoolProperty
             case int():
                 property = IntProperty
+            case float():
+                property = FloatProperty
             case _:
                 raise TypeError(f"{optim} cannot infer optimization property from {type(kwargs['default'])}")
     attributes[f"optimizations_{optim}"] = property(**kwargs)
@@ -221,6 +223,7 @@ def optimization(optim, property=None, **kwargs):
 ), default=0, name="VAE Tiling", description="Decodes generated images in tiled regions to reduce memory usage in exchange for longer decode time and less accurate colors.\nCan allow for generating larger images that would otherwise run out of memory on the final step")
 optimization("vae_tile_size", min=1, name="VAE Tile Size", description="Width and height measurement of tiles. Smaller sizes are more likely to cause inaccurate colors and other undesired artifacts")
 optimization("vae_tile_blend", min=0, name="VAE Tile Blend", description="Minimum amount of how much each edge of a tile will intersect its adjacent tile")
+optimization("cfg_end", name="CFG End", min=0, max=1, description="The percentage of steps to complete before disabling classifier-free guidance")
 optimization("cpu_only", name="CPU Only", description="Disables GPU acceleration and is extremely slow")
 
 def map_structure_token_items(value):

diff --git a/ui/panels/dream_texture.py b/ui/panels/dream_texture.py
@@ -308,6 +308,7 @@ def optimization(prop):
             optimization("half_precision")
             optimization("channels_last_memory_format")
             optimization("batch_size")
+            optimization("cfg_end")
     yield SpeedOptimizationPanel
 
     class MemoryOptimizationPanel(sub_panel):
@@ -378,6 +379,12 @@ def draw(self, context):
                 row.operator(CancelGenerator.bl_idname, icon="CANCEL", text="")
             row.operator(ReleaseGenerator.bl_idname, icon="X", text="")
 
+            if context.scene.dream_textures_last_execution_time != "":
+                r = layout.row()
+                r.scale_x = 0.5
+                r.scale_y = 0.5
+                r.label(text=context.scene.dream_textures_last_execution_time, icon="SORTTIME")
+
             # Validation
             try:
                 prompt.validate(context)