From 9214f4a3782a74e510eff7e09b59457fe8b63511 Mon Sep 17 00:00:00 2001 From: Aryan Date: Mon, 28 Oct 2024 05:59:23 +0100 Subject: [PATCH] remove attention mask for self-attention --- src/diffusers/pipelines/allegro/pipeline_allegro.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/diffusers/pipelines/allegro/pipeline_allegro.py b/src/diffusers/pipelines/allegro/pipeline_allegro.py index afcc2a568670..a8042e75cd14 100644 --- a/src/diffusers/pipelines/allegro/pipeline_allegro.py +++ b/src/diffusers/pipelines/allegro/pipeline_allegro.py @@ -843,6 +843,8 @@ def __call__( if do_classifier_free_guidance: prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) prompt_attention_mask = torch.cat([negative_prompt_attention_mask, prompt_attention_mask], dim=0) + if prompt_embeds.ndim == 3: + prompt_embeds = prompt_embeds.unsqueeze(1) # b l d -> b 1 l d # 4. Prepare timesteps timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps) @@ -884,17 +886,9 @@ def __call__( # broadcast to batch dimension in a way that's compatible with ONNX/Core ML timestep = t.expand(latent_model_input.shape[0]) - if prompt_embeds.ndim == 3: - prompt_embeds = prompt_embeds.unsqueeze(1) # b l d -> b 1 l d - - # prepare attention_mask. - # b c t h w -> b t h w - attention_mask = torch.ones_like(latent_model_input)[:, 0] - # predict noise model_output noise_pred = self.transformer( - latent_model_input, - attention_mask=attention_mask, + hidden_states=latent_model_input, encoder_hidden_states=prompt_embeds, encoder_attention_mask=prompt_attention_mask, timestep=timestep,