Project-MONAI · Warvito · Mar 20, 2023 · Jan 16, 2023 · Jan 17, 2023 · Feb 9, 2023
diff --git a/generative/networks/nets/diffusion_model_unet.py b/generative/networks/nets/diffusion_model_unet.py
@@ -1889,4 +1889,4 @@ def forward(
         # 7. output block
         h = self.out(h)
 
-        return h
+        return h
diff --git a/generative/networks/schedulers/ddim.py b/generative/networks/schedulers/ddim.py
@@ -225,7 +225,83 @@ def step(
 
         return pred_prev_sample, pred_original_sample
 
-    def add_noise(self, original_samples: torch.Tensor, noise: torch.Tensor, timesteps: torch.Tensor) -> torch.Tensor:
+    def reversed_step(
+        self,
+        model_output: torch.Tensor,
+        timestep: int,
+        sample: torch.Tensor,
+        eta: float = 0.0,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
+        process from the learned model outputs (most often the predicted noise).
+
+        Args:
+            model_output: direct output from learned diffusion model.
+            timestep: current discrete timestep in the diffusion chain.
+            sample: current instance of sample being created by diffusion process.
+            eta: weight of noise for added noise in diffusion step.
+            predict_epsilon: flag to use when model predicts the samples directly instead of the noise, epsilon.
+            generator: random number generator.
+
+        Returns:
+            pred_prev_sample: Predicted previous sample
+            pred_original_sample: Predicted original sample
+        """
+        # See Appendix F at https://arxiv.org/pdf/2105.05233.pdf, or Equation (6) in https://arxiv.org/pdf/2203.04306.pdf
+
+        # Notation (<variable name> -> <name in paper>
+        # - model_output -> e_theta(x_t, t)
+        # - pred_original_sample -> f_theta(x_t, t) or x_0
+        # - std_dev_t -> sigma_t
+        # - eta -> η
+        # - pred_sample_direction -> "direction pointing to x_t"
+        # - pred_post_sample -> "x_t+1"
+
+        assert eta == 0, "eta must be 0 for reversed_step"
+
+        # 1. get previous step value (=t-1)
+        prev_timestep = timestep + self.num_train_timesteps // self.num_inference_steps  # t+1
-        # 1. get previous step value (=t-1)
-        prev_timestep = timestep + self.num_train_timesteps // self.num_inference_steps  # t+1
+        # 1. get previous step value (=t+1)
+        prev_timestep = timestep + self.num_train_timesteps // self.num_inference_steps 
-        # 1. get previous step value (=t-1)
-        prev_timestep = timestep + self.num_train_timesteps // self.num_inference_steps  # t+1
+        # 1. get previous step value (=t+1)
+        prev_timestep = timestep + self.num_train_timesteps // self.num_inference_steps 
+
+        # 2. compute alphas, betas
+        alpha_prod_t = self.alphas_cumprod[timestep]
+        alpha_prod_t_prev = (
+            self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
+        )  # alpha at timestep t+1
+
+        beta_prod_t = 1 - alpha_prod_t
+
+        # 3. compute predicted original sample from predicted noise also called
+        # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+        if self.prediction_type == "epsilon":
+            pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
+        elif self.prediction_type == "sample":
+            pred_original_sample = model_output
+        elif self.prediction_type == "v_prediction":
+            pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output
+            # predict V
+            model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample
+
+        # 4. Clip "predicted x_0"
+        if self.clip_sample:
+            pred_original_sample = torch.clamp(pred_original_sample, -1, 1)
+
+
+        # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+        pred_sample_direction = (1 - alpha_prod_t_prev) ** (0.5) * model_output
+
+        # 7. compute x_t+1 without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+        pred_post_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
+
+        return pred_post_sample, pred_original_sample
+
+    def add_noise(
+        self,
+        original_samples: torch.Tensor,
+        noise: torch.Tensor,
+        timesteps: torch.Tensor,
+    ) -> torch.Tensor:
+
         """
         Add noise to the original samples.
 
@@ -270,4 +346,4 @@ def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: tor
             sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
 
         velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
-        return velocity
+        return velocity