s1dlx · ljleb · Nov 10, 2023 · Nov 10, 2023 · Nov 10, 2023 · Nov 10, 2023
diff --git a/sd_meh/merge_methods.py b/sd_meh/merge_methods.py
@@ -1,8 +1,9 @@
+import functools
 import math
-from typing import Tuple
-
+import operator
 import torch
 from torch import Tensor
+from typing import Tuple
 
 __all__ = [
     "weighted_sum",
@@ -17,6 +18,7 @@
     "similarity_add_difference",
     "distribution_crossover",
     "ties_add_difference",
+    "rotate",
 ]
 
 
@@ -209,3 +211,85 @@ def filter_top_k(a: Tensor, k: float):
     k_value, _ = torch.kthvalue(torch.abs(a.flatten()).float(), k)
     top_k_filter = (torch.abs(a) >= k_value).float()
     return a * top_k_filter
+
+
+def rotate(a: Tensor, b: Tensor, alpha: float, beta: float, **kwargs):
+    if alpha == 0 and beta == 0:
+        return a
+
+    is_conv = len(a.shape) == 4 and a.shape[-1] != 1
+    if len(a.shape) == 0 or is_conv or torch.allclose(a.half(), b.half()):
+        return weighted_sum(a, b, beta)
+
+    if len(a.shape) == 4:
+        shape_2d = (-1, functools.reduce(operator.mul, a.shape[1:]))
+    else:
+        shape_2d = (-1, a.shape[-1])
+
+    a_neurons = a.reshape(*shape_2d).double()
+    b_neurons = b.reshape(*shape_2d).double()
+
+    a_centroid = a_neurons.mean(0)
+    b_centroid = b_neurons.mean(0)
+    new_centroid = sample_ellipsis(a_centroid, b_centroid, 2 * torch.pi * alpha)
+    if len(a.shape) == 1 or len(a.shape) == 2 and a.shape[0] == 1:
+        return new_centroid.reshape_as(a)
+
+    a_neurons -= a_centroid
+    b_neurons -= b_centroid
+
+    svd_driver = "gesvd" if a.is_cuda else None
 "a": thetas["model_a"][key].to(work_device), 
 "b": thetas["model_b"][key].to(work_device), 
 if work_device is None: 
     work_device = device 
 "a": thetas["model_a"][key].to(work_device), 
 "b": thetas["model_b"][key].to(work_device), 
 if work_device is None: 
     work_device = device 
+    u, _, v_t = torch.linalg.svd(
+        a_neurons.T @ b_neurons, full_matrices=False, driver=svd_driver
+    )
+
+    alpha_is_float = alpha != round(alpha)
+    if alpha_is_float:
+        # cancel reflection. without this, eigenvalues often have a complex component
+        #   and then we can't obtain a valid dtype for the merge
+        u[:, -1] /= torch.det(u) * torch.det(v_t)
+
+    transform = rotation = u @ v_t
+    print("shape:", transform.shape)
+    det = torch.det(transform)
+    if torch.abs(det.abs() - 1) > 1e-6:
+        print("determinant error:", det)
+
+    if alpha_is_float:
+        transform = fractional_matrix_power(transform, alpha)
+    elif alpha == 0:
+        transform = torch.eye(
+            len(transform),
+            dtype=transform.dtype,
+            device=transform.device,
+        )
+    elif alpha != 1:
+        transform = torch.linalg.matrix_power(transform, round(alpha))
+
+    if beta != 0:
+        # interpolate the relationship between the neurons
+        a_neurons = weighted_sum(a_neurons, b_neurons @ rotation.T, beta)
+
+    a_neurons @= transform
+    a_neurons += new_centroid
+    return a_neurons.reshape_as(a).to(a.dtype)
+
+
+def fractional_matrix_power(matrix: Tensor, power: float):
+    eigenvalues, eigenvectors = torch.linalg.eig(matrix)
+    eigenvalues.pow_(power)
+    result = eigenvectors @ torch.diag(eigenvalues) @ torch.linalg.inv(eigenvectors)
+    if ((error := result.imag) > 1e-4).any():
+        print("image error:", error)
+    return result.real.to(dtype=matrix.dtype)
+
+
+def sample_ellipsis(a, b, t):
+    return torch.column_stack((a, b)) @ torch.tensor(
+        [
+            math.sin(t),
+            math.cos(t),
+        ],
+        dtype=a.dtype,
+        device=a.device,
+    )
diff --git a/sd_meh/rebasin.py b/sd_meh/rebasin.py
@@ -2200,11 +2200,9 @@ def apply_permutation(ps: PermutationSpec, perm, params):
 def update_model_a(ps: PermutationSpec, perm, model_a, new_alpha):
     for k in model_a:
         try:
-            perm_params = get_permuted_param(
-                ps, perm, k, model_a
-            )
+            perm_params = get_permuted_param(ps, perm, k, model_a)
             model_a[k] = model_a[k] * (1 - new_alpha) + new_alpha * perm_params
-        except RuntimeError: # dealing with pix2pix and inpainting models
+        except RuntimeError:  # dealing with pix2pix and inpainting models
             continue
     return model_a