Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization Fixes and Improvements #575

Merged
merged 14 commits into from
Apr 7, 2023
7 changes: 5 additions & 2 deletions generator_process/actions/depth_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,10 @@ def __call__(
# 12. Run safety checker
# image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype)

# Offload last model to CPU
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
self.final_offload_hook.offload()

# NOTE: Modified to yield the decoded image as a numpy array.
yield ImageGenerationResult(
[np.asarray(PIL.ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255.
Expand Down Expand Up @@ -362,8 +366,7 @@ def __call__(
_configure_model_padding(pipe.vae, seamless_axes)

# Inference
with (torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext()), \
(torch.autocast(device) if optimizations.can_use("amp", device) else nullcontext()):
with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext():
yield from pipe(
prompt=prompt,
depth_image=depth_image,
Expand Down
12 changes: 12 additions & 0 deletions generator_process/actions/detect_seamless/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ def __eq__(self, other):
return True
return False

def __and__(self, other):
return SeamlessAxes((self.x and other.x, self.y and other.y))

def __or__(self, other):
return SeamlessAxes((self.x or other.x, self.y or other.y))

def __xor__(self, other):
return SeamlessAxes((self.x != other.x, self.y != other.y))

def __invert__(self):
return SeamlessAxes((not self.x, not self.y))

@classmethod
def _missing_(cls, value):
if isinstance(value, str):
Expand Down
39 changes: 21 additions & 18 deletions generator_process/actions/image_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ def __call__(
# 10. Run safety checker
# image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype)

# Offload last model to CPU
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
self.final_offload_hook.offload()

# NOTE: Modified to yield the decoded image as a numpy array.
yield ImageGenerationResult(
[np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255.
Expand Down Expand Up @@ -180,24 +184,23 @@ def __call__(
_configure_model_padding(pipe.vae, seamless_axes)

# Inference
with (torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext()), \
(torch.autocast(device) if optimizations.can_use("amp", device) else nullcontext()):
yield from pipe(
prompt=prompt,
image=[init_image] * batch_size,
strength=strength,
num_inference_steps=steps,
guidance_scale=cfg_scale,
negative_prompt=negative_prompt if use_negative_prompt else None,
num_images_per_prompt=1,
eta=0.0,
generator=generator,
output_type="pil",
return_dict=True,
callback=None,
callback_steps=1,
step_preview_mode=step_preview_mode
)
with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext():
yield from pipe(
prompt=prompt,
image=[init_image] * batch_size,
strength=strength,
num_inference_steps=steps,
guidance_scale=cfg_scale,
negative_prompt=negative_prompt if use_negative_prompt else None,
num_images_per_prompt=1,
eta=0.0,
generator=generator,
output_type="pil",
return_dict=True,
callback=None,
callback_steps=1,
step_preview_mode=step_preview_mode
)
case Pipeline.STABILITY_SDK:
import stability_sdk.client
import stability_sdk.interfaces.gooseai.generation.generation_pb2
Expand Down
71 changes: 37 additions & 34 deletions generator_process/actions/inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@ def __call__(
# 10. Run safety checker
# image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype)

# Offload last model to CPU
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
self.final_offload_hook.offload()

# NOTE: Modified to yield the decoded image as a numpy array.
yield ImageGenerationResult(
[np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255.
Expand Down Expand Up @@ -213,40 +217,39 @@ def __call__(
_configure_model_padding(pipe.vae, seamless_axes)

# Inference
with (torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext()), \
(torch.autocast(device) if optimizations.can_use("amp", device) else nullcontext()):
match inpaint_mask_src:
case 'alpha':
mask_image = ImageOps.invert(init_image.getchannel('A'))
case 'prompt':
from transformers import AutoProcessor, CLIPSegForImageSegmentation

processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
clipseg = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
inputs = processor(text=[text_mask], images=[init_image.convert('RGB')], return_tensors="pt", padding=True)
outputs = clipseg(**inputs)
mask_image = Image.fromarray(np.uint8((1 - torch.sigmoid(outputs.logits).lt(text_mask_confidence).int().detach().numpy()) * 255), 'L').resize(init_image.size)

yield from pipe(
prompt=prompt,
image=[init_image.convert('RGB')] * batch_size,
mask_image=[mask_image] * batch_size,
strength=strength,
height=init_image.size[1] if fit else height,
width=init_image.size[0] if fit else width,
num_inference_steps=steps,
guidance_scale=cfg_scale,
negative_prompt=negative_prompt if use_negative_prompt else None,
num_images_per_prompt=1,
eta=0.0,
generator=generator,
latents=None,
output_type="pil",
return_dict=True,
callback=None,
callback_steps=1,
step_preview_mode=step_preview_mode
)
with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext():
match inpaint_mask_src:
case 'alpha':
mask_image = ImageOps.invert(init_image.getchannel('A'))
case 'prompt':
from transformers import AutoProcessor, CLIPSegForImageSegmentation

processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
clipseg = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
inputs = processor(text=[text_mask], images=[init_image.convert('RGB')], return_tensors="pt", padding=True)
outputs = clipseg(**inputs)
mask_image = Image.fromarray(np.uint8((1 - torch.sigmoid(outputs.logits).lt(text_mask_confidence).int().detach().numpy()) * 255), 'L').resize(init_image.size)

yield from pipe(
prompt=prompt,
image=[init_image.convert('RGB')] * batch_size,
mask_image=[mask_image] * batch_size,
strength=strength,
height=init_image.size[1] if fit else height,
width=init_image.size[0] if fit else width,
num_inference_steps=steps,
guidance_scale=cfg_scale,
negative_prompt=negative_prompt if use_negative_prompt else None,
num_images_per_prompt=1,
eta=0.0,
generator=generator,
latents=None,
output_type="pil",
return_dict=True,
callback=None,
callback_steps=1,
step_preview_mode=step_preview_mode
)
case Pipeline.STABILITY_SDK:
import stability_sdk.client
import stability_sdk.interfaces.gooseai.generation.generation_pb2
Expand Down
Loading