ott-jax · michalk8 · Sep 1, 2022 · Jul 19, 2022 · Jul 19, 2022 · Jul 20, 2022
diff --git a/docs/notebooks/LRSinkhorn.ipynb b/docs/notebooks/LRSinkhorn.ipynb
@@ -22,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 5,
    "metadata": {
     "id": "q9wY2bCeUIB0"
    },
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 11,
    "metadata": {
     "id": "PfiRNdhVW8hT"
    },
@@ -67,16 +67,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 12,
    "metadata": {
     "id": "pN_f36ACALET"
    },
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:absl:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)\n"
+     "ename": "AttributeError",
+     "evalue": "module 'ott' has no attribute 'geometry'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-12-c9395a034c52>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_points\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mgeom\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mott\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgeometry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpointcloud\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPointCloud\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepsilon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0mot_prob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mott\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear_problems\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLinearProblem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgeom\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAttributeError\u001b[0m: module 'ott' has no attribute 'geometry'"
      ]
     }
    ],
@@ -367,7 +371,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.8.2"
   }
  },
  "nbformat": 4,

diff --git a/ott/core/fixed_point_loop.py b/ott/core/fixed_point_loop.py
@@ -15,7 +15,7 @@
 # Lint as: python3
 """jheek@ backprop-friendly implementation of fixed point loop."""
 from typing import Any, Callable
-
+ 
 import jax
 import numpy as np
 from jax import numpy as jnp

diff --git a/ott/core/linear_problems.py b/ott/core/linear_problems.py
@@ -15,7 +15,7 @@
 
 from typing import Callable, Optional, Tuple
 
-import jax
+import jax 
 import jax.numpy as jnp
 
 from ott.geometry import geometry

diff --git a/ott/core/sinkhorn.py b/ott/core/sinkhorn.py
@@ -15,7 +15,7 @@
 # Lint as: python3
 """A Jax implementation of the Sinkhorn algorithm."""
 from typing import Any, Callable, NamedTuple, Optional, Sequence, Tuple
-
+ 
 import jax
 import jax.numpy as jnp
 import numpy as np

diff --git a/ott/core/sinkhorn_lr.py b/ott/core/sinkhorn_lr.py
@@ -21,7 +21,8 @@
 from typing_extensions import Literal
 
 from ott.core import fixed_point_loop, linear_problems, sinkhorn
-from ott.geometry import geometry
+from ott.geometry import geometry, pointcloud
+from ott.tools import k_means
 
 
 class LRSinkhornState(NamedTuple):
@@ -30,12 +31,24 @@ class LRSinkhornState(NamedTuple):
   q: Optional[jnp.ndarray] = None
   r: Optional[jnp.ndarray] = None
   g: Optional[jnp.ndarray] = None
+  q_prev: Optional[jnp.ndarray] = None
+  r_prev: Optional[jnp.ndarray] = None
+  g_prev: Optional[jnp.ndarray] = None
+  gamma: Optional[float] = None
   costs: Optional[jnp.ndarray] = None
+  criterion: Optional[float] = None
+  count_escape: Optional[int] = None
 
   def set(self, **kwargs: Any) -> 'LRSinkhornState':
     """Return a copy of self, with potential overwrites."""
     return self._replace(**kwargs)
 
+  def compute_crit(self) -> float:
+    return compute_criterion(
+        self.q, self.r, self.g, self.q_prev, self.r_prev, self.g_prev,
+        self.gamma
+    )
+
   def reg_ot_cost(
       self,
       ot_prob: linear_problems.LinearProblem,
@@ -101,6 +114,22 @@ def solution_error(
   return err
 
 
+def compute_criterion(
+    q: jnp.ndarray, r: jnp.ndarray, g: jnp.ndarray, q_prev: jnp.ndarray,
+    r_prev: jnp.ndarray, g_prev: jnp.ndarray, gamma: float
+):
+  err_1 = ((1 / gamma) ** 2) * (kl(q, q_prev) + kl(q_prev, q))
+  err_2 = ((1 / gamma) ** 2) * (kl(r, r_prev) + kl(r_prev, r))
+  err_3 = ((1 / gamma) ** 2) * (kl(g, g_prev) + kl(g_prev, g))
+  criterion = err_1 + err_2 + err_3
+  return criterion
+
+
+def kl(q1, q2):
+  ratio = jnp.log(q1) - jnp.log(q2)
+  return jnp.sum(q1 * ratio)
+
+
 class LRSinkhornOutput(NamedTuple):
   """Implement the problems.Transport interface, for a LR Sinkhorn solution."""
 
@@ -209,6 +238,7 @@ class LRSinkhorn(sinkhorn.Sinkhorn):
   Args:
     rank: the rank constraint on the coupling to minimize the linear OT problem
     gamma: the (inverse of) gradient stepsize used by mirror descent.
+    gamma_init: TODO.
     epsilon: entropic regularization added on top of low-rank problem.
     init_type: TODO.
     lse_mode: whether to run computations in lse or kernel mode. At this moment,
@@ -232,9 +262,10 @@ class LRSinkhorn(sinkhorn.Sinkhorn):
   def __init__(
       self,
       rank: int = 10,
-      gamma: float = 1.0,
-      epsilon: float = 1e-4,
-      init_type: Literal['random', 'rank_2'] = 'random',
+      gamma: float = 10.0,
+      gamma_init: Literal['rescale', 'not_recale'] = 'rescale',
+      epsilon: float = 0.0,
+      init_type: Literal['random', 'rank_2', 'kmeans'] = 'kmeans',
       lse_mode: bool = True,
       threshold: float = 1e-3,
       norm_error: int = 1,
@@ -250,6 +281,7 @@ def __init__(
     # TODO(michalk8): this should call super
     self.rank = rank
     self.gamma = gamma
+    self.gamma_init = gamma_init
     self.epsilon = epsilon
     self.init_type = init_type
     self.lse_mode = lse_mode
@@ -275,7 +307,7 @@ def __call__(
     """Main interface to run LR sinkhorn."""  # noqa: D401
     init_q, init_r, init_g = (init if init is not None else (None, None, None))
     # Random initialization for q, r, g using rng_key
-    rng = jax.random.split(jax.random.PRNGKey(self.rng_key), 3)
+    rng = jax.random.split(jax.random.PRNGKey(self.rng_key), 5)
     a, b = ot_prob.a, ot_prob.b
     if self.init_type == 'random':
       if init_g is None:
@@ -306,6 +338,49 @@ def __call__(
       if init_r is None:
         init_r = lambda_1 * jnp.dot(b1[:, None], g1.reshape(1, -1))
         init_r += (1 - lambda_1) * jnp.dot(b2[:, None], g2.reshape(1, -1))
+    elif self.init_type == 'kmeans':
+      x = ot_prob.geom.x
+      y = ot_prob.geom.y
+      if init_g is None:
+        init_g = jnp.ones((self.rank,)) / self.rank
+      if init_q is None:
+        kmeans_x = jax.jit(
+            k_means.kmeans, static_argnums=(2, 3, 4, 5)
+        ) if self.jit else k_means.kmeans
+        kmeans_x = kmeans_x(rng[3], x, self.rank)
+        z_x = kmeans_x[0]
+        geom_x = pointcloud.PointCloud(
+            x, z_x, epsilon=0.1, scale_cost='max_cost'
+        )
+        ot_prob_x = linear_problems.LinearProblem(geom_x, a, init_g)
+        solver_x = sinkhorn.Sinkhorn(
+            norm_error=self.norm_error,
+            lse_mode=self.lse_mode,
+            jit=self.jit,
+            implicit_diff=self.implicit_diff,
+            use_danskin=self.use_danskin
+        )
+        ot_sink_x = solver_x(ot_prob_x)
+        init_q = ot_sink_x.matrix
+      if init_r is None:
+        kmeans_y = jax.jit(
+            k_means.kmeans, static_argnums=(2, 3, 4, 5)
+        ) if self.jit else k_means.kmeans
+        kmeans_y = kmeans_y(rng[4], y, self.rank)
+        z_y = kmeans_y[0]
+        geom_y = pointcloud.PointCloud(
+            y, z_y, epsilon=0.1, scale_cost='max_cost'
+        )
+        ot_prob_y = linear_problems.LinearProblem(geom_y, b, init_g)
+        solver_y = sinkhorn.Sinkhorn(
+            norm_error=self.norm_error,
+            lse_mode=self.lse_mode,
+            jit=self.jit,
+            implicit_diff=self.implicit_diff,
+            use_danskin=self.use_danskin
+        )
+        ot_sink_y = solver_y(ot_prob_y)
+        init_r = ot_sink_y.matrix
     else:
       raise NotImplementedError(self.init_type)
     run_fn = jax.jit(run) if self.jit else run
@@ -316,13 +391,26 @@ def norm_error(self) -> Tuple[int]:
     return (self._norm_error,)
 
   def _converged(self, state: LRSinkhornState, iteration: int) -> bool:
-    costs, i, tol = state.costs, iteration, self.threshold
-    return jnp.logical_and(
-        i >= 2, jnp.isclose(costs[i - 2], costs[i - 1], rtol=tol)
-    )
+    criterion, count_escape, i, tol = state.criterion, state.count_escape, iteration, self.threshold
+    if i >= 2:
+      if criterion > tol / 1e-1:
+        err = criterion
+      else:
+        count_escape = count_escape + 1
+        state.set(count_escape=count_escape)
+        if count_escape != iteration:
+          err = criterion
+        else:
+          err = jnp.inf
+    else:
+      err = jnp.inf
+    return jnp.logical_and(i >= 2, err < tol)
 
   def _diverged(self, state: LRSinkhornState, iteration: int) -> bool:
-    return jnp.logical_not(jnp.isfinite(state.costs[iteration - 1]))
+    return jnp.logical_or(
+        jnp.logical_not(jnp.isfinite(state.criterion)),
+        jnp.logical_not(jnp.isfinite(state.costs[iteration - 1]))
+    )
 
   def _continue(self, state: LRSinkhornState, iteration: int) -> bool:
     """Continue while not(converged) and not(diverged)."""
@@ -338,15 +426,34 @@ def lr_costs(
       self, ot_prob: linear_problems.LinearProblem, state: LRSinkhornState,
       iteration: int
   ) -> Tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray]:
-    c_q = ot_prob.geom.apply_cost(state.r, axis=1) / state.g[None, :]
-    c_q += (self.epsilon - 1 / self.gamma) * jnp.log(state.q)
-    c_r = ot_prob.geom.apply_cost(state.q) / state.g[None, :]
-    c_r += (self.epsilon - 1 / self.gamma) * jnp.log(state.r)
+    grad_q = ot_prob.geom.apply_cost(state.r, axis=1) / state.g[None, :]
+    grad_q = jnp.where(
+        self.epsilon != 0., grad_q + self.epsilon * jnp.log(state.q), grad_q
+    )
+    if self.gamma_init == "rescale":
+      norm_q = jnp.max(jnp.abs(grad_q)) ** 2
+    grad_r = ot_prob.geom.apply_cost(state.q) / state.g[None, :]
+    grad_r = jnp.where(
+        self.epsilon != 0., grad_r + self.epsilon * jnp.log(state.r), grad_r
+    )
+    if self.gamma_init == "rescale":
+      norm_r = jnp.max(jnp.abs(grad_r)) ** 2
     diag_qcr = jnp.sum(
         state.q * ot_prob.geom.apply_cost(state.r, axis=1), axis=0
     )
+    grad_g = -diag_qcr / state.g ** 2
+    grad_g = jnp.where(
+        self.epsilon != 0., grad_g + self.epsilon * jnp.log(state.g), grad_g
+    )
+    if self.gamma_init == "rescale":
+      norm_g = jnp.max(jnp.abs(grad_g)) ** 2
+    if self.gamma_init == "rescale":
+      self.gamma = self.gamma / max(norm_q, norm_r, norm_g)
     h = diag_qcr / state.g ** 2 - (self.epsilon -
                                    1 / self.gamma) * jnp.log(state.g)
+    c_q = grad_q - (1 / self.gamma) * jnp.log(state.q)
+    c_r = grad_r - (1 / self.gamma) * jnp.log(state.r)
+    h = -grad_g + (1 / self.gamma) * jnp.log(state.g)
     return c_q, c_r, h
 
   def dysktra_update(
@@ -358,10 +465,10 @@ def dysktra_update(
       state: LRSinkhornState,
       iteration: int,
       min_entry_value: float = 1e-6,
-      tolerance: float = 1e-4,
+      tolerance: float = 1e-3,
       min_iter: int = 0,
       inner_iter: int = 10,
-      max_iter: int = 200
+      max_iter: int = 10000
   ) -> Tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray]:
     # shortcuts for problem's definition.
     r = self.rank
@@ -458,11 +565,15 @@ def lse_step(
       iteration: int
   ) -> LRSinkhornState:
     """LR Sinkhorn LSE update."""
+    q_prev, r_prev, g_prev = state.q, state.r, state.g
     c_q, c_r, h = self.lr_costs(ot_prob, state, iteration)
+    gamma = self.gamma
     q, r, g = self.dysktra_update(
         c_q, c_r, h, ot_prob, state, iteration, **self.kwargs_dys
     )
-    return state.set(q=q, g=g, r=r)
+    return state.set(
+        q=q, g=g, r=r, q_prev=q_prev, g_prev=g_prev, r_prev=r_prev, gamma=gamma
+    )
 
   def kernel_step(
       self, ot_prob: linear_problems.LinearProblem, state: LRSinkhornState,
@@ -496,22 +607,39 @@ def one_iteration(
     else:
       state = self.kernel_step(ot_prob, state, iteration)
 
+    # compute the criterion
+    criterion = state.compute_crit()
+
     # re-computes error if compute_error is True, else set it to inf.
     cost = jnp.where(
         jnp.logical_and(compute_error, iteration >= self.min_iterations),
         state.reg_ot_cost(ot_prob), jnp.inf
     )
     costs = state.costs.at[iteration // self.inner_iterations].set(cost)
-    return state.set(costs=costs)
+    return state.set(costs=costs, criterion=criterion)
 
   def init_state(
       self, ot_prob: linear_problems.LinearProblem,
       init: Tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray]
   ) -> LRSinkhornState:
     """Return the initial state of the loop."""
+    gamma = self.gamma
     q, r, g = init
     costs = -jnp.ones(self.outer_iterations)
-    return LRSinkhornState(q=q, r=r, g=g, costs=costs)
+    criterion = 0.0
+    count_escape = 1
+    return LRSinkhornState(
+        q=q,
+        r=r,
+        g=g,
+        q_prev=q,
+        r_prev=r,
+        g_prev=g,
+        gamma=gamma,
+        costs=costs,
+        criterion=criterion,
+        count_escape=count_escape
+    )
 
   def output_from_state(
       self, ot_prob: linear_problems.LinearProblem, state: LRSinkhornState