diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index dd3010672..dbb3368a5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,14 +12,14 @@ repos:
   - id: yapf
     additional_dependencies: [toml]
 - repo: https://github.com/nbQA-dev/nbQA
-  rev: 1.6.1
+  rev: 1.6.3
   hooks:
   - id: nbqa-pyupgrade
     args: [--py38-plus]
   - id: nbqa-black
   - id: nbqa-isort
 - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
-  rev: v2.6.0
+  rev: v2.7.0
   hooks:
   - id: pretty-format-yaml
     args: [--autofix, --indent, '2']
@@ -36,7 +36,7 @@ repos:
   - id: check-case-conflict
 - repo: https://github.com/charliermarsh/ruff-pre-commit
   # Ruff version.
-  rev: v0.0.246
+  rev: v0.0.252
   hooks:
   - id: ruff
     args: [--fix, --exit-non-zero-on-fix]
diff --git a/docs/geometry.rst b/docs/geometry.rst
index f1a38b49c..fe6858b68 100644
--- a/docs/geometry.rst
+++ b/docs/geometry.rst
@@ -64,6 +64,7 @@ Cost Functions
     costs.ElasticL1
     costs.ElasticSTVS
     costs.ElasticSqKOverlap
+    costs.SoftDTW
 
 Utilities
 ---------
diff --git a/docs/references.bib b/docs/references.bib
index 3e8c0a465..4d40c23bb 100644
--- a/docs/references.bib
+++ b/docs/references.bib
@@ -1,786 +1,771 @@
-@InProceedings{vayer:19,
-  title = {Optimal Transport for structured data with application on graphs},
-  author = {Titouan, Vayer and Courty, Nicolas and Tavenard, Romain and Laetitia, Chapel and Flamary, R{\'e}mi},
-  booktitle = {Proceedings of the 36th International Conference on Machine Learning},
-  pages = {6275--6284},
-  year = {2019},
-  editor = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
-  volume = {97},
-  series = {Proceedings of Machine Learning Research},
-  month = {09--15 Jun},
+@inproceedings{vayer:19,
+  author    = {Titouan, Vayer and Courty, Nicolas and Tavenard, Romain and Laetitia, Chapel and Flamary, Rémi},
+  editor    = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
   publisher = {PMLR},
-  pdf = {http://proceedings.mlr.press/v97/titouan19a/titouan19a.pdf},
-  url = {https://proceedings.mlr.press/v97/titouan19a.html},
+  url       = {https://proceedings.mlr.press/v97/titouan19a.html},
+  booktitle = {Proceedings of the 36th International Conference on Machine Learning},
+  file      = {http://proceedings.mlr.press/v97/titouan19a/titouan19a.pdf},
+  month     = jun,
+  pages     = {6275--6284},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Optimal Transport for structured data with application on graphs},
+  volume    = {97},
+  year      = {2019},
 }
 
 @article{cuturi:15,
-  author = {Cuturi, Marco and Peyr\'{e}, Gabriel},
-  title = {A Smoothed Dual Approach for Variational Wasserstein Problems},
+  author  = {Cuturi, Marco and Peyré, Gabriel},
+  url     = {https://doi.org/10.1137/15M1032600},
+  doi     = {10.1137/15M1032600},
   journal = {SIAM Journal on Imaging Sciences},
-  volume = {9},
-  number = {1},
-  pages = {320-343},
-  year = {2016},
-  doi = {10.1137/15M1032600},
-  URL = {https://doi.org/10.1137/15M1032600}
+  number  = {1},
+  pages   = {320--343},
+  title   = {A Smoothed Dual Approach for Variational Wasserstein Problems},
+  volume  = {9},
+  year    = {2016},
 }
 
-@InProceedings{peyre:16,
-  title = {Gromov-Wasserstein Averaging of Kernel and Distance Matrices},
-  author = {Peyré, Gabriel and Cuturi, Marco and Solomon, Justin},
+@inproceedings{peyre:16,
+  author    = {Peyré, Gabriel and Cuturi, Marco and Solomon, Justin},
+  editor    = {Balcan, Maria Florina and Weinberger, Kilian Q.},
+  location  = {New York, New York, USA},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v48/peyre16.html},
   booktitle = {Proceedings of The 33rd International Conference on Machine Learning},
-  pages = {2664--2672},
-  year = {2016},
-  editor = {Balcan, Maria Florina and Weinberger, Kilian Q.},
-  volume = {48},
-  series = {Proceedings of Machine Learning Research},
-  address = {New York, New York, USA},
-  month = {20--22 Jun},
+  file      = {http://proceedings.mlr.press/v48/peyre16.pdf},
+  month     = jun,
+  pages     = {2664--2672},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Gromov-Wasserstein Averaging of Kernel and Distance Matrices},
+  volume    = {48},
+  year      = {2016},
+}
+
+@inproceedings{feydy:19,
+  author    = {Feydy, Jean and Séjourné, Thibault and Vialard, François-Xavier and Amari, Shun-ichi and Trouve, Alain and Peyré, Gabriel},
+  editor    = {Chaudhuri, Kamalika and Sugiyama, Masashi},
   publisher = {PMLR},
-  pdf = {http://proceedings.mlr.press/v48/peyre16.pdf},
-  url = {https://proceedings.mlr.press/v48/peyre16.html},
-}
-
-@InProceedings{feydy:19,
-  title = 	 {Interpolating between Optimal Transport and MMD using Sinkhorn Divergences},
-  author =       {Feydy, Jean and S\'{e}journ\'{e}, Thibault and Vialard, Fran\c{c}ois-Xavier and Amari, Shun-ichi and Trouve, Alain and Peyr\'{e}, Gabriel},
-  booktitle = 	 {Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics},
-  pages = 	 {2681--2690},
-  year = 	 {2019},
-  editor = 	 {Chaudhuri, Kamalika and Sugiyama, Masashi},
-  volume = 	 {89},
-  series = 	 {Proceedings of Machine Learning Research},
-  month = 	 {16--18 Apr},
-  publisher =    {PMLR},
-  pdf = 	 {http://proceedings.mlr.press/v89/feydy19a/feydy19a.pdf},
-  url = 	 {https://proceedings.mlr.press/v89/feydy19a.html},
-  abstract = 	 {Comparing probability distributions is a fundamental problem in data sciences. Simple norms and divergences such as the total variation and the relative entropy only compare densities in a point-wise manner and fail to capture the geometric nature of the problem. In sharp contrast, Maximum Mean Discrepancies (MMD) and Optimal Transport distances (OT) are two classes of distances between measures that take into account the geometry of the underlying space and metrize the convergence in law.  This paper studies the Sinkhorn divergences, a family of geometric divergences that interpolates between MMD and OT. Relying on a new notion of geometric entropy, we provide theoretical guarantees for these divergences: positivity, convexity and metrization of the convergence in law. On the practical side, we detail a numerical scheme that enables the large scale application of these divergences for machine learning: on the GPU, gradients of the Sinkhorn loss can be computed for batches of a million samples.}
-}
-
-
-@InProceedings{cuturi:14,
-  title = {Fast Computation of Wasserstein Barycenters},
-  author = {Cuturi, Marco and Doucet, Arnaud},
+  url       = {https://proceedings.mlr.press/v89/feydy19a.html},
+  booktitle = {Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics},
+  file      = {http://proceedings.mlr.press/v89/feydy19a/feydy19a.pdf},
+  month     = apr,
+  pages     = {2681--2690},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Interpolating between Optimal Transport and MMD using Sinkhorn Divergences},
+  volume    = {89},
+  year      = {2019},
+}
+
+@inproceedings{cuturi:14,
+  author    = {Cuturi, Marco and Doucet, Arnaud},
+  editor    = {Xing, Eric P. and Jebara, Tony},
+  location  = {Bejing, China},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v32/cuturi14.html},
   booktitle = {Proceedings of the 31st International Conference on Machine Learning},
-  pages = {685--693},
-  year = {2014},
-  editor = {Xing, Eric P. and Jebara, Tony},
-  volume = {32},
-  number = {2},
-  series = {Proceedings of Machine Learning Research},
-  address = {Bejing, China},
-  month = {22--24 Jun},
-  publisher =  {PMLR},
-  pdf = {http://proceedings.mlr.press/v32/cuturi14.pdf},
-  url = {https://proceedings.mlr.press/v32/cuturi14.html},
-}
-
-@InProceedings{indyk:19,
- title = {Sample-Optimal Low-Rank Approximation of Distance Matrices},
- author = {Indyk, Pitor and Vakilian, Ali and Wagner, Tal and Woodruff, David P},
- booktitle = {Proceedings of the Thirty-Second Conference on Learning Theory},
- pages = {1723--1751},
- year = {2019},
- editor = {Beygelzimer, Alina and Hsu, Daniel},
- volume = {99},
- series = {Proceedings of Machine Learning Research},
- month = {25--28 Jun},
- publisher = {PMLR},
- pdf = {http://proceedings.mlr.press/v99/indyk19a/indyk19a.pdf},
- url = {https://proceedings.mlr.press/v99/indyk19a.html},
-}
-
-@InProceedings{scetbon:21,
- title = {Low-Rank Sinkhorn Factorization},
- author = {Scetbon, Meyer and Cuturi, Marco and Peyr{\'e}, Gabriel},
- booktitle = {Proceedings of the 38th International Conference on Machine Learning},
- pages = {9344--9354},
- year = {2021},
- editor = {Meila, Marina and Zhang, Tong},
- volume = {139},
- series = {Proceedings of Machine Learning Research},
- month = {18--24 Jul},
- publisher = {PMLR},
- pdf = {http://proceedings.mlr.press/v139/scetbon21a/scetbon21a.pdf},
- url = {https://proceedings.mlr.press/v139/scetbon21a.html},
-}
-
-@Article{schiebinger:19,
- author = {Schiebinger, Geoffrey and Shu, Jian and Tabaka, Marcin and Cleary, Brian and Subramanian, Vidya
- and Solomon, Aryeh and Gould, Joshua and Liu, Siyan and Lin, Stacie and Berube, Peter and Lee, Lia and Chen, Jenny
- and Brumbaugh, Justin and Rigollet, Philippe and Hochedlinger, Konrad and Jaenisch, Rudolf
- and Regev, Aviv and Lander, Eric S.},
- title = {Optimal-Transport Analysis of Single-Cell Gene Expression Identifies Developmental Trajectories
- in Reprogramming},
- journal = {Cell},
- year = {2019},
- month = {Feb},
- day = {07},
- publisher = {Elsevier},
- volume = {176},
- number = {4},
- pages = {928-943.e22},
- issn = {0092-8674},
- doi = {10.1016/j.cell.2019.01.006},
-}
-
-@Article{memoli:11,
- author = "M{\'e}moli, Facundo",
- title = "Gromov--Wasserstein Distances and the Metric Approach to Object Matching",
- journal = "Foundations of Computational Mathematics",
- year = "2011",
- month = "Aug",
- day = "01",
- volume = "11",
- number = "4",
- pages = "417--487",
- issn = "1615-3383",
- doi = "10.1007/s10208-011-9093-5",
- url = "https://doi.org/10.1007/s10208-011-9093-5"
-}
-
-@InProceedings{scetbon:22,
- title = {Linear-Time Gromov {W}asserstein Distances using Low Rank Couplings and Costs},
- author = {Scetbon, Meyer and Peyr{\'e}, Gabriel and Cuturi, Marco},
- booktitle = {Proceedings of the 39th International Conference on Machine Learning},
- pages = {19347--19365},
- year = {2022},
- editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},
- volume = {162},
- series = {Proceedings of Machine Learning Research},
- month = {17--23 Jul},
- publisher = {PMLR},
- pdf = {https://proceedings.mlr.press/v162/scetbon22b/scetbon22b.pdf},
- url = {https://proceedings.mlr.press/v162/scetbon22b.html},
-}
-
-@Article{vayer:20,
- author = {Vayer, Titouan and Chapel, Laetitia and Flamary, Remi and Tavenard, Romain and Courty, Nicolas},
- title = {Fused Gromov-Wasserstein Distance for Structured Objects},
- journal = {Algorithms},
- volume = {13},
- year = {2020},
- number = {9},
- article-numer = {212},
- url = {https://www.mdpi.com/1999-4893/13/9/212},
- issn = {1999-4893},
- doi = {10.3390/a13090212}
-}
-
-@Article{demetci:22,
-  author = {Demetci, Pinar and Santorella, Rebecca and Sandstede, Bj\"{o}rn and Noble, William Stafford and
-    Singh, Ritambhara},
-  title = {SCOT: Single-Cell Multi-Omics Alignment with Optimal Transport},
+  file      = {http://proceedings.mlr.press/v32/cuturi14.pdf},
+  month     = jun,
+  number    = {2},
+  pages     = {685--693},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Fast Computation of Wasserstein Barycenters},
+  volume    = {32},
+  year      = {2014},
+}
+
+@inproceedings{indyk:19,
+  author    = {Indyk, Pitor and Vakilian, Ali and Wagner, Tal and Woodruff, David P},
+  editor    = {Beygelzimer, Alina and Hsu, Daniel},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v99/indyk19a.html},
+  booktitle = {Proceedings of the Thirty-Second Conference on Learning Theory},
+  file      = {http://proceedings.mlr.press/v99/indyk19a/indyk19a.pdf},
+  month     = jun,
+  pages     = {1723--1751},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Sample-Optimal Low-Rank Approximation of Distance Matrices},
+  volume    = {99},
+  year      = {2019},
+}
+
+@inproceedings{scetbon:21,
+  author    = {Scetbon, Meyer and Cuturi, Marco and Peyré, Gabriel},
+  editor    = {Meila, Marina and Zhang, Tong},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v139/scetbon21a.html},
+  booktitle = {Proceedings of the 38th International Conference on Machine Learning},
+  file      = {http://proceedings.mlr.press/v139/scetbon21a/scetbon21a.pdf},
+  month     = jul,
+  pages     = {9344--9354},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Low-Rank Sinkhorn Factorization},
+  volume    = {139},
+  year      = {2021},
+}
+
+@article{schiebinger:19,
+  author    = {Schiebinger, Geoffrey and Shu, Jian and Tabaka, Marcin and Cleary, Brian and Subramanian, Vidya and Solomon, Aryeh and Gould, Joshua and Liu, Siyan and Lin, Stacie and Berube, Peter and Lee, Lia and Chen, Jenny and Brumbaugh, Justin and Rigollet, Philippe and Hochedlinger, Konrad and Jaenisch, Rudolf and Regev, Aviv and Lander, Eric S.},
+  publisher = {Elsevier},
+  doi       = {10.1016/j.cell.2019.01.006},
+  issn      = {0092-8674},
+  journal   = {Cell},
+  number    = {4},
+  pages     = {928--943.e22},
+  title     = {Optimal-Transport Analysis of Single-Cell Gene Expression Identifies Developmental Trajectories in Reprogramming},
+  volume    = {176},
+  year      = {2019},
+}
+
+@article{memoli:11,
+  author  = {Mémoli, Facundo},
+  url     = {https://doi.org/10.1007/s10208-011-9093-5},
+  doi     = {10.1007/s10208-011-9093-5},
+  issn    = {1615-3383},
+  journal = {Foundations of Computational Mathematics},
+  number  = {4},
+  pages   = {417--487},
+  title   = {Gromov--Wasserstein Distances and the Metric Approach to Object Matching},
+  volume  = {11},
+  year    = {2011},
+}
+
+@inproceedings{scetbon:22,
+  author    = {Scetbon, Meyer and Peyré, Gabriel and Cuturi, Marco},
+  editor    = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v162/scetbon22b.html},
+  booktitle = {Proceedings of the 39th International Conference on Machine Learning},
+  file      = {https://proceedings.mlr.press/v162/scetbon22b/scetbon22b.pdf},
+  month     = jul,
+  pages     = {19347--19365},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Linear-Time Gromov {W}asserstein Distances using Low Rank Couplings and Costs},
+  volume    = {162},
+  year      = {2022},
+}
+
+@article{vayer:20,
+  author  = {Vayer, Titouan and Chapel, Laetitia and Flamary, Remi and Tavenard, Romain and Courty, Nicolas},
+  url     = {https://www.mdpi.com/1999-4893/13/9/212},
+  doi     = {10.3390/a13090212},
+  issn    = {1999-4893},
+  journal = {Algorithms},
+  number  = {9},
+  title   = {Fused Gromov-Wasserstein Distance for Structured Objects},
+  volume  = {13},
+  year    = {2020},
+}
+
+@article{demetci:22,
+  author  = {Demetci, Pinar and Santorella, Rebecca and Sandstede, Björn and Noble, William Stafford and Singh, Ritambhara},
+  doi     = {10.1089/cmb.2021.0446},
   journal = {Journal of Computational Biology},
-  volume = {29},
-  number = {1},
-  pages = {3-18},
-  year = {2022},
-  doi = {10.1089/cmb.2021.0446},
-  note = {PMID: 35050714},
-}
-
-@Article{chen:19,
- author = "Chen, Song and Lake, Blue B. and Zhang, Kun",
- title = "High-throughput sequencing of the transcriptome and chromatin accessibility in the same cell",
- journal = "Nature Biotechnology",
- year = "2019",
- month = "Dec",
- day = "01",
- volume = "37",
- number = "12",
- pages = "1452--1457",
- issn = "1546-1696",
- doi = "10.1038/s41587-019-0290-0",
- url = "https://doi.org/10.1038/s41587-019-0290-0"
-}
-
-@Misc{richter-powell:21,
- doi = {10.48550/ARXIV.2111.12187},
- url = {https://arxiv.org/abs/2111.12187},
- author = {Richter-Powell, Jack and Lorraine, Jonathan and Amos, Brandon},
- keywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences,
-   FOS: Computer and information sciences},
- title = {Input Convex Gradient Networks},
- publisher = {arXiv},
- year = {2021},
- copyright = {arXiv.org perpetual, non-exclusive license}
+  note    = {PMID: 35050714},
+  number  = {1},
+  pages   = {3--18},
+  title   = {SCOT: Single-Cell Multi-Omics Alignment with Optimal Transport},
+  volume  = {29},
+  year    = {2022},
+}
+
+@article{chen:19,
+  author  = {Chen, Song and Lake, Blue B. and Zhang, Kun},
+  url     = {https://doi.org/10.1038/s41587-019-0290-0},
+  doi     = {10.1038/s41587-019-0290-0},
+  issn    = {1546-1696},
+  journal = {Nature Biotechnology},
+  number  = {12},
+  pages   = {1452--1457},
+  title   = {High-throughput sequencing of the transcriptome and chromatin accessibility in the same cell},
+  volume  = {37},
+  year    = {2019},
+}
+
+@misc{richter-powell:21,
+  author    = {Richter-Powell, Jack and Lorraine, Jonathan and Amos, Brandon},
+  publisher = {arXiv},
+  url       = {https://arxiv.org/abs/2111.12187},
+  doi       = {10.48550/ARXIV.2111.12187},
+  keywords  = {Machine Learning (cs.LG),Machine Learning (stat.ML),FOS: Computer and information sciences,FOS: Computer and information sciences},
+  title     = {Input Convex Gradient Networks},
+  year      = {2021},
 }
 
 @inproceedings{bunne:22,
-  title={Supervised Training of Conditional Monge Maps},
-  author={Charlotte Bunne and Andreas Krause and marco cuturi},
-  booktitle={Advances in Neural Information Processing Systems},
-  editor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},
-  year={2022},
-  url={https://openreview.net/forum?id=sPNtVVUq7wi}
-}
-
-@Article{gelbrich:90,
- author = {Gelbrich, Matthias},
- title = {On a Formula for the L2 Wasserstein Metric between Measures on Euclidean and Hilbert Spaces},
- journal = {Mathematische Nachrichten},
- volume = {147},
- number = {1},
- pages = {185-203},
- doi = {https://doi.org/10.1002/mana.19901470121},
- year = {1990}
-}
-
-@InProceedings{amos:17,
- title = {Input Convex Neural Networks},
- author = {Brandon Amos and Lei Xu and J. Zico Kolter},
- booktitle = {Proceedings of the 34th International Conference on Machine Learning},
- pages = {146--155},
- year = {2017},
- editor = {Precup, Doina and Teh, Yee Whye},
- volume = {70},
- series = {Proceedings of Machine Learning Research},
- month = {06--11 Aug},
- publisher = {PMLR},
- pdf = {http://proceedings.mlr.press/v70/amos17b/amos17b.pdf},
- url = {https://proceedings.mlr.press/v70/amos17b.html},
-}
-
-@InProceedings{makkuva:20,
- title = {Optimal transport mapping via input convex neural networks},
- author = {Makkuva, Ashok and Taghvaei, Amirhossein and Oh, Sewoong and Lee, Jason},
- booktitle = {Proceedings of the 37th International Conference on Machine Learning},
- pages = {6672--6681},
- year = {2020},
- editor = {III, Hal Daumé and Singh, Aarti},
- volume = {119},
- series = {Proceedings of Machine Learning Research},
- month = {13--18 Jul},
- publisher = {PMLR},
- pdf = {http://proceedings.mlr.press/v119/makkuva20a/makkuva20a.pdf},
- url = {https://proceedings.mlr.press/v119/makkuva20a.html},
-}
-
-@InProceedings{cuturi:19,
- author = {Cuturi, Marco and Teboul, Olivier and Vert, Jean-Philippe},
- booktitle = {Advances in Neural Information Processing Systems},
- editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and
-   R. Garnett},
- publisher = {Curran Associates, Inc.},
- title = {Differentiable Ranking and Sorting using Optimal Transport},
- url = {https://proceedings.neurips.cc/paper/2019/file/d8c24ca8f23c562a5600876ca2a550ce-Paper.pdf},
- volume = {32},
- year = {2019}
-}
-
-@InProceedings{cuturi:20a,
- title = {Supervised Quantile Normalization for Low Rank Matrix Factorization},
- author = {Cuturi, Marco and Teboul, Olivier and Niles-Weed, Jonathan and Vert, Jean-Philippe},
- booktitle = {Proceedings of the 37th International Conference on Machine Learning},
- pages = {2269--2279},
- year = {2020},
- editor = {III, Hal Daumé and Singh, Aarti},
- volume = {119},
- series = {Proceedings of Machine Learning Research},
- month = {13--18 Jul},
- publisher = {PMLR},
- pdf = {http://proceedings.mlr.press/v119/cuturi20a/cuturi20a.pdf},
- url = {https://proceedings.mlr.press/v119/cuturi20a.html},
-}
-
-@InProceedings{gramfort:15,
- title = {Fast optimal transport averaging of neuroimaging data},
- author = {Gramfort, Alexandre and Peyr{\'e}, Gabriel and Cuturi, Marco},
- booktitle = {International Conference on Information Processing in Medical Imaging},
- pages = {261--272},
- year = {2015},
- organization = {Springer}
-}
-
-@Article{benamou:15,
- author = {Benamou, Jean-David and Carlier, Guillaume and Cuturi, Marco and Nenna, Luca and Peyr\'{e}, Gabriel},
- title = {Iterative Bregman Projections for Regularized Transportation Problems},
- journal = {SIAM Journal on Scientific Computing},
- volume = {37},
- number = {2},
- pages = {A1111-A1138},
- year = {2015},
- doi = {10.1137/141000439},
+  author    = {Bunne, Charlotte and Krause, Andreas and marco cuturi},
+  editor    = {Oh, Alice H. and Agarwal, Alekh and Belgrave, Danielle and Cho, Kyunghyun},
+  url       = {https://openreview.net/forum?id=sPNtVVUq7wi},
+  booktitle = {Advances in Neural Information Processing Systems},
+  title     = {Supervised Training of Conditional Monge Maps},
+  year      = {2022},
+}
+
+@article{gelbrich:90,
+  author  = {Gelbrich, Matthias},
+  doi     = {https://doi.org/10.1002/mana.19901470121},
+  journal = {Mathematische Nachrichten},
+  number  = {1},
+  pages   = {185--203},
+  title   = {On a Formula for the L2 Wasserstein Metric between Measures on Euclidean and Hilbert Spaces},
+  volume  = {147},
+  year    = {1990},
+}
+
+@inproceedings{amos:17,
+  author    = {Amos, Brandon and Xu, Lei and Kolter, J. Zico},
+  editor    = {Precup, Doina and Teh, Yee Whye},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v70/amos17b.html},
+  booktitle = {Proceedings of the 34th International Conference on Machine Learning},
+  file      = {http://proceedings.mlr.press/v70/amos17b/amos17b.pdf},
+  month     = aug,
+  pages     = {146--155},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Input Convex Neural Networks},
+  volume    = {70},
+  year      = {2017},
+}
+
+@inproceedings{makkuva:20,
+  author    = {Makkuva, Ashok and Taghvaei, Amirhossein and Oh, Sewoong and Lee, Jason},
+  editor    = {III, Hal Daumé and Singh, Aarti},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v119/makkuva20a.html},
+  booktitle = {Proceedings of the 37th International Conference on Machine Learning},
+  file      = {http://proceedings.mlr.press/v119/makkuva20a/makkuva20a.pdf},
+  month     = jul,
+  pages     = {6672--6681},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Optimal transport mapping via input convex neural networks},
+  volume    = {119},
+  year      = {2020},
+}
+
+@inproceedings{cuturi:19,
+  author    = {Cuturi, Marco and Teboul, Olivier and Vert, Jean-Philippe},
+  editor    = {Wallach, H. and Larochelle, H. and Beygelzimer, A. and d'Alché-Buc, F. and Fox, E. and Garnett, R.},
+  publisher = {Curran Associates, Inc.},
+  url       = {https://proceedings.neurips.cc/paper/2019/file/d8c24ca8f23c562a5600876ca2a550ce-Paper.pdf},
+  booktitle = {Advances in Neural Information Processing Systems},
+  title     = {Differentiable Ranking and Sorting using Optimal Transport},
+  volume    = {32},
+  year      = {2019},
+}
+
+@inproceedings{cuturi:20a,
+  author    = {Cuturi, Marco and Teboul, Olivier and Niles-Weed, Jonathan and Vert, Jean-Philippe},
+  editor    = {III, Hal Daumé and Singh, Aarti},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v119/cuturi20a.html},
+  booktitle = {Proceedings of the 37th International Conference on Machine Learning},
+  file      = {http://proceedings.mlr.press/v119/cuturi20a/cuturi20a.pdf},
+  month     = jul,
+  pages     = {2269--2279},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Supervised Quantile Normalization for Low Rank Matrix Factorization},
+  volume    = {119},
+  year      = {2020},
+}
+
+@inproceedings{gramfort:15,
+  author       = {Gramfort, Alexandre and Peyré, Gabriel and Cuturi, Marco},
+  organization = {Springer},
+  booktitle    = {International Conference on Information Processing in Medical Imaging},
+  pages        = {261--272},
+  title        = {Fast optimal transport averaging of neuroimaging data},
+  year         = {2015},
+}
+
+@article{benamou:15,
+  author  = {Benamou, Jean-David and Carlier, Guillaume and Cuturi, Marco and Nenna, Luca and Peyré, Gabriel},
+  doi     = {10.1137/141000439},
+  journal = {SIAM Journal on Scientific Computing},
+  number  = {2},
+  pages   = {A1111--A1138},
+  title   = {Iterative Bregman Projections for Regularized Transportation Problems},
+  volume  = {37},
+  year    = {2015},
 }
 
 @article{brenier:91,
-  title={Polar factorization and monotone rearrangement of vector-valued functions},
-  author={Brenier, Yann},
-  journal={Communications on pure and applied mathematics},
-  volume={44},
-  number={4},
-  pages={375--417},
-  year={1991},
-  publisher={Wiley Online Library}
-}
-
-@InProceedings{cuturi:13,
- author = {Cuturi, Marco},
- booktitle = {Advances in Neural Information Processing Systems},
- editor = {C.J. Burges and L. Bottou and M. Welling and Z. Ghahramani and K.Q. Weinberger},
- publisher = {Curran Associates, Inc.},
- title = {Sinkhorn Distances: Lightspeed Computation of Optimal Transport},
- url = {https://proceedings.neurips.cc/paper/2013/file/af21d0c97db2e27e13572cbf59eb343d-Paper.pdf},
- volume = {26},
- year = {2013}
-}
-
-@Article{peyre:19,
- author = {Gabriel Peyré and Marco Cuturi},
- url = {http://dx.doi.org/10.1561/2200000073},
- year = {2019},
- volume = {11},
- journal = {Foundations and Trends® in Machine Learning},
- title = {Computational Optimal Transport: With Applications to Data Science},
- doi = {10.1561/2200000073},
- issn = {1935-8237},
- number = {5-6},
- pages = {355-607},
-}
-
-@Article{solomon:15,
- author = {Solomon, Justin and de Goes, Fernando and Peyr\'{e}, Gabriel and Cuturi, Marco and Butscher, Adrian and
-  Nguyen, Andy and Du, Tao and Guibas, Leonidas},
- title = {Convolutional Wasserstein Distances: Efficient Optimal Transportation on Geometric Domains},
- year = {2015},
- issue_date = {August 2015},
- publisher = {Association for Computing Machinery},
- address = {New York, NY, USA},
- volume = {34},
- number = {4},
- issn = {0730-0301},
- url = {https://doi.org/10.1145/2766963},
- doi = {10.1145/2766963},
- journal = {ACM Trans. Graph.},
- month = {jul},
- articleno = {66},
- numpages = {11},
- keywords = {entropy, wasserstein distances, optimal transportation, displacement interpolation}
-}
-
-@InProceedings{genevay:18,
- title = {Learning Generative Models with Sinkhorn Divergences},
- author = {Genevay, Aude and Peyre, Gabriel and Cuturi, Marco},
- booktitle = {Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics},
- pages = {1608--1617},
- year = {2018},
- editor = {Storkey, Amos and Perez-Cruz, Fernando},
- volume = {84},
- series = {Proceedings of Machine Learning Research},
- month = {09--11 Apr},
- publisher = {PMLR},
- pdf = {http://proceedings.mlr.press/v84/genevay18a/genevay18a.pdf},
- url = {https://proceedings.mlr.press/v84/genevay18a.html},
-}
-
-@Misc{sejourne:19,
- doi = {10.48550/ARXIV.1910.12958},
- url = {https://arxiv.org/abs/1910.12958},
- author = {Séjourné, Thibault and Feydy, Jean and Vialard, François-Xavier and Trouvé, Alain and Peyré, Gabriel},
- keywords = {Optimization and Control (math.OC), Machine Learning (cs.LG), Machine Learning (stat.ML),
- FOS: Mathematics, FOS: Mathematics, FOS: Computer and information sciences, FOS: Computer and information sciences},
- title = {Sinkhorn Divergences for Unbalanced Optimal Transport},
- publisher = {arXiv},
- year = {2019},
- copyright = {arXiv.org perpetual, non-exclusive license}
-}
-
-@Article{janati:20,
- title = {Entropic optimal transport between unbalanced Gaussian measures has a closed form},
- author = {Janati, Hicham and Muzellec, Boris and Peyr{\'e}, Gabriel and Cuturi, Marco},
- journal = {Advances in neural information processing systems},
- volume = {33},
- pages = {10468--10479},
- year = {2020}
-}
-
-@Article{chen:19a,
- author = {Chen, Yongxin and Georgiou, Tryphon T. and Tannenbaum, Allen},
- journal = {IEEE Access},
- title = {Optimal Transport for Gaussian Mixture Models},
- year = {2019},
- volume = {7},
- pages = {6269-6278},
- doi = {10.1109/ACCESS.2018.2889838}
-}
-
-@Article{delon:20,
- author = {Delon, Julie and Desolneux, Agn\`{e}s},
- title = {A Wasserstein-Type Distance in the Space of Gaussian Mixture Models},
- journal = {SIAM Journal on Imaging Sciences},
- volume = {13},
- number = {2},
- pages = {936-970},
- year = {2020},
- doi = {10.1137/19M1301047},
-}
-
-@InProceedings{janati:20a,
- title = {Debiased {S}inkhorn barycenters},
- author =  {Janati, Hicham and Cuturi, Marco and Gramfort, Alexandre},
- booktitle = {Proceedings of the 37th International Conference on Machine Learning},
- pages = {4692--4701},
- year = {2020},
- editor = {III, Hal Daumé and Singh, Aarti},
- volume = {119},
- series = {Proceedings of Machine Learning Research},
- month = {13--18 Jul},
- publisher = {PMLR},
- pdf = {http://proceedings.mlr.press/v119/janati20a/janati20a.pdf},
- url = {https://proceedings.mlr.press/v119/janati20a.html},
-}
-
-@Article{schmitz:18,
- author = {Schmitz, Morgan A. and Heitz, Matthieu and Bonneel, Nicolas and Ngol\`{e}, Fred and Coeurjolly, David and
-  Cuturi, Marco and Peyr\'{e}, Gabriel and Starck, Jean-Luc},
- title = {Wasserstein Dictionary Learning: Optimal Transport-Based Unsupervised Nonlinear Dictionary Learning},
- journal = {SIAM Journal on Imaging Sciences},
- volume = {11},
- number = {1},
- pages = {643-678},
- year = {2018},
- doi = {10.1137/17M1140431},
-}
-
-@Article{alvarez-esteban:16,
- title = {A fixed-point approach to barycenters in Wasserstein space},
- journal = {Journal of Mathematical Analysis and Applications},
- volume = {441},
- number = {2},
- pages = {744-762},
- year = {2016},
- issn = {0022-247X},
- doi = {https://doi.org/10.1016/j.jmaa.2016.04.045},
- url = {https://www.sciencedirect.com/science/article/pii/S0022247X16300907},
- author = {Pedro C. Álvarez-Esteban and E. {del Barrio} and J.A. Cuesta-Albertos and C. Matrán},
- keywords = {Mass transportation problem, -Wasserstein distance, Wasserstein barycenter, Fréchet mean,
- Fixed-point iteration, Location-scatter families},
-}
-
-@Article{lehmann:21,
- author = "Lehmann, Tobias and von Renesse, Max-K. and Sambale, Alexander and Uschmajew, Andr{\'e}",
- title = "A note on overrelaxation in the Sinkhorn algorithm",
- journal = "Optimization Letters",
- year = "2021",
- month = "Dec",
- day = "14",
- issn = "1862-4480",
- doi = "10.1007/s11590-021-01830-0",
- url = "https://doi.org/10.1007/s11590-021-01830-0"
+  author    = {Brenier, Yann},
+  publisher = {Wiley Online Library},
+  journal   = {Communications on pure and applied mathematics},
+  number    = {4},
+  pages     = {375--417},
+  title     = {Polar factorization and monotone rearrangement of vector-valued functions},
+  volume    = {44},
+  year      = {1991},
+}
+
+@inproceedings{cuturi:13,
+  author    = {Cuturi, Marco},
+  editor    = {Burges, C.J. and Bottou, L. and Welling, M. and Ghahramani, Z. and Weinberger, K.Q.},
+  publisher = {Curran Associates, Inc.},
+  url       = {https://proceedings.neurips.cc/paper/2013/file/af21d0c97db2e27e13572cbf59eb343d-Paper.pdf},
+  booktitle = {Advances in Neural Information Processing Systems},
+  title     = {Sinkhorn Distances: Lightspeed Computation of Optimal Transport},
+  volume    = {26},
+  year      = {2013},
+}
+
+@article{peyre:19,
+  author  = {Peyré, Gabriel and Cuturi, Marco},
+  url     = {http://dx.doi.org/10.1561/2200000073},
+  doi     = {10.1561/2200000073},
+  issn    = {1935-8237},
+  journal = {Foundations and Trends® in Machine Learning},
+  number  = {5-6},
+  pages   = {355--607},
+  title   = {Computational Optimal Transport: With Applications to Data Science},
+  volume  = {11},
+  year    = {2019},
+}
+
+@article{solomon:15,
+  author    = {Solomon, Justin and de Goes, Fernando and Peyré, Gabriel and Cuturi, Marco and Butscher, Adrian and Nguyen, Andy and Du, Tao and Guibas, Leonidas},
+  location  = {New York, NY, USA},
+  publisher = {Association for Computing Machinery},
+  url       = {https://doi.org/10.1145/2766963},
+  doi       = {10.1145/2766963},
+  issn      = {0730-0301},
+  journal   = {ACM Trans. Graph.},
+  keywords  = {entropy,wasserstein distances,optimal transportation,displacement interpolation},
+  month     = jul,
+  number    = {4},
+  title     = {Convolutional Wasserstein Distances: Efficient Optimal Transportation on Geometric Domains},
+  volume    = {34},
+  year      = {2015},
+}
+
+@inproceedings{genevay:18,
+  author    = {Genevay, Aude and Peyre, Gabriel and Cuturi, Marco},
+  editor    = {Storkey, Amos and Perez-Cruz, Fernando},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v84/genevay18a.html},
+  booktitle = {Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics},
+  file      = {http://proceedings.mlr.press/v84/genevay18a/genevay18a.pdf},
+  month     = apr,
+  pages     = {1608--1617},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Learning Generative Models with Sinkhorn Divergences},
+  volume    = {84},
+  year      = {2018},
+}
+
+@misc{sejourne:19,
+  author    = {Séjourné, Thibault and Feydy, Jean and Vialard, François-Xavier and Trouvé, Alain and Peyré, Gabriel},
+  publisher = {arXiv},
+  url       = {https://arxiv.org/abs/1910.12958},
+  doi       = {10.48550/ARXIV.1910.12958},
+  keywords  = {Optimization and Control (math.OC),Machine Learning (cs.LG),Machine Learning (stat.ML),FOS: Mathematics,FOS: Mathematics,FOS: Computer and information sciences,FOS: Computer and information sciences},
+  title     = {Sinkhorn Divergences for Unbalanced Optimal Transport},
+  year      = {2019},
+}
+
+@article{janati:20,
+  author  = {Janati, Hicham and Muzellec, Boris and Peyré, Gabriel and Cuturi, Marco},
+  journal = {Advances in neural information processing systems},
+  pages   = {10468--10479},
+  title   = {Entropic optimal transport between unbalanced Gaussian measures has a closed form},
+  volume  = {33},
+  year    = {2020},
+}
+
+@article{chen:19a,
+  author  = {Chen, Yongxin and Georgiou, Tryphon T. and Tannenbaum, Allen},
+  doi     = {10.1109/ACCESS.2018.2889838},
+  journal = {IEEE Access},
+  pages   = {6269--6278},
+  title   = {Optimal Transport for Gaussian Mixture Models},
+  volume  = {7},
+  year    = {2019},
+}
+
+@article{delon:20,
+  author  = {Delon, Julie and Desolneux, Agnès},
+  doi     = {10.1137/19M1301047},
+  journal = {SIAM Journal on Imaging Sciences},
+  number  = {2},
+  pages   = {936--970},
+  title   = {A Wasserstein-Type Distance in the Space of Gaussian Mixture Models},
+  volume  = {13},
+  year    = {2020},
+}
+
+@inproceedings{janati:20a,
+  author    = {Janati, Hicham and Cuturi, Marco and Gramfort, Alexandre},
+  editor    = {III, Hal Daumé and Singh, Aarti},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v119/janati20a.html},
+  booktitle = {Proceedings of the 37th International Conference on Machine Learning},
+  file      = {http://proceedings.mlr.press/v119/janati20a/janati20a.pdf},
+  month     = jul,
+  pages     = {4692--4701},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Debiased {S}inkhorn barycenters},
+  volume    = {119},
+  year      = {2020},
+}
+
+@article{schmitz:18,
+  author  = {Schmitz, Morgan A. and Heitz, Matthieu and Bonneel, Nicolas and Ngolè, Fred and Coeurjolly, David and Cuturi, Marco and Peyré, Gabriel and Starck, Jean-Luc},
+  doi     = {10.1137/17M1140431},
+  journal = {SIAM Journal on Imaging Sciences},
+  number  = {1},
+  pages   = {643--678},
+  title   = {Wasserstein Dictionary Learning: Optimal Transport-Based Unsupervised Nonlinear Dictionary Learning},
+  volume  = {11},
+  year    = {2018},
+}
+
+@article{alvarez-esteban:16,
+  author   = {Álvarez-Esteban, Pedro C. and {del Barrio}, E. and Cuesta-Albertos, J.A. and Matrán, C.},
+  url      = {https://www.sciencedirect.com/science/article/pii/S0022247X16300907},
+  doi      = {https://doi.org/10.1016/j.jmaa.2016.04.045},
+  issn     = {0022-247X},
+  journal  = {Journal of Mathematical Analysis and Applications},
+  keywords = {Mass transportation problem,-Wasserstein distance,Wasserstein barycenter,Fréchet mean,Fixed-point iteration,Location-scatter families},
+  number   = {2},
+  pages    = {744--762},
+  title    = {A fixed-point approach to barycenters in Wasserstein space},
+  volume   = {441},
+  year     = {2016},
+}
+
+@article{lehmann:21,
+  author  = {Lehmann, Tobias and von Renesse, Max-K. and Sambale, Alexander and Uschmajew, André},
+  url     = {https://doi.org/10.1007/s11590-021-01830-0},
+  doi     = {10.1007/s11590-021-01830-0},
+  issn    = {1862-4480},
+  journal = {Optimization Letters},
+  title   = {A note on overrelaxation in the Sinkhorn algorithm},
+  year    = {2021},
 }
 
 @inproceedings{sejourne:21,
- author = {Sejourne, Thibault and Vialard, Francois-Xavier and Peyr\'{e}, Gabriel},
- booktitle = {Advances in Neural Information Processing Systems},
- editor = {M. Ranzato and A. Beygelzimer and Y. Dauphin and P.S. Liang and J. Wortman Vaughan},
- pages = {8766--8779},
- publisher = {Curran Associates, Inc.},
- title = {The Unbalanced Gromov Wasserstein Distance: Conic Formulation and Relaxation},
- url = {https://proceedings.neurips.cc/paper/2021/file/4990974d150d0de5e6e15a1454fe6b0f-Paper.pdf},
- volume = {34},
- year = {2021}
+  author    = {Sejourne, Thibault and Vialard, Francois-Xavier and Peyré, Gabriel},
+  editor    = {Ranzato, M. and Beygelzimer, A. and Dauphin, Y. and Liang, P.S. and Vaughan, J. Wortman},
+  publisher = {Curran Associates, Inc.},
+  url       = {https://proceedings.neurips.cc/paper/2021/file/4990974d150d0de5e6e15a1454fe6b0f-Paper.pdf},
+  booktitle = {Advances in Neural Information Processing Systems},
+  pages     = {8766--8779},
+  title     = {The Unbalanced Gromov Wasserstein Distance: Conic Formulation and Relaxation},
+  volume    = {34},
+  year      = {2021},
 }
 
 @inproceedings{chizat:20,
- author = {Chizat, L\'{e}na\"{\i}c and Roussillon, Pierre and L\'{e}ger, Flavien and Vialard,
-  Fran\c{c}ois-Xavier and Peyr\'{e}, Gabriel},
- booktitle = {Advances in Neural Information Processing Systems},
- editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
- pages = {2257--2269},
- publisher = {Curran Associates, Inc.},
- title = {Faster Wasserstein Distance Estimation with the Sinkhorn Divergence},
- url = {https://proceedings.neurips.cc/paper/2020/file/17f98ddf040204eda0af36a108cbdea4-Paper.pdf},
- volume = {33},
- year = {2020}
-}
-
-@Article{higham:97,
- author = "Higham, Nicholas J.",
- title = "Stable iterations for the matrix square root",
- journal = "Numerical Algorithms",
- year = "1997",
- month = "Sep",
- day = "01",
- volume = "15",
- number = "2",
- pages = "227--242",
- issn = "1572-9265",
- doi = "10.1023/A:1019150005407",
- url = "https://doi.org/10.1023/A:1019150005407"
-}
-
-@Article{lloyd:82,
- author = {Lloyd, S.},
- journal = {IEEE Transactions on Information Theory},
- title = {Least squares quantization in PCM},
- year = {1982},
- volume = {28},
- number = {2},
- pages = {129-137},
- doi = {10.1109/TIT.1982.1056489}
+  author    = {Chizat, Lénaı̈c and Roussillon, Pierre and Léger, Flavien and Vialard, François-Xavier and Peyré, Gabriel},
+  editor    = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M.F. and Lin, H.},
+  publisher = {Curran Associates, Inc.},
+  url       = {https://proceedings.neurips.cc/paper/2020/file/17f98ddf040204eda0af36a108cbdea4-Paper.pdf},
+  booktitle = {Advances in Neural Information Processing Systems},
+  pages     = {2257--2269},
+  title     = {Faster Wasserstein Distance Estimation with the Sinkhorn Divergence},
+  volume    = {33},
+  year      = {2020},
+}
+
+@article{higham:97,
+  author  = {Higham, Nicholas J.},
+  url     = {https://doi.org/10.1023/A:1019150005407},
+  doi     = {10.1023/A:1019150005407},
+  issn    = {1572-9265},
+  journal = {Numerical Algorithms},
+  number  = {2},
+  pages   = {227--242},
+  title   = {Stable iterations for the matrix square root},
+  volume  = {15},
+  year    = {1997},
+}
+
+@article{lloyd:82,
+  author  = {Lloyd, S.},
+  doi     = {10.1109/TIT.1982.1056489},
+  journal = {IEEE Transactions on Information Theory},
+  number  = {2},
+  pages   = {129--137},
+  title   = {Least squares quantization in PCM},
+  volume  = {28},
+  year    = {1982},
 }
 
 @inproceedings{arthur:07,
- author = {Arthur, David and Vassilvitskii, Sergei},
- title = {K-Means++: The Advantages of Careful Seeding},
- year = {2007},
- isbn = {9780898716245},
- publisher = {Society for Industrial and Applied Mathematics},
- address = {USA},
- booktitle = {Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms},
- pages = {1027–1035},
- numpages = {9},
- location = {New Orleans, Louisiana},
- series = {SODA '07}
+  author    = {Arthur, David and Vassilvitskii, Sergei},
+  location  = {New Orleans, Louisiana},
+  publisher = {Society for Industrial and Applied Mathematics},
+  booktitle = {Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms},
+  isbn      = {9780898716245},
+  pages     = {1027--1035},
+  series    = {SODA '07},
+  title     = {K-Means++: The Advantages of Careful Seeding},
+  year      = {2007},
 }
 
 @article{thornton2022rethinking:22,
-  title = {Rethinking Initialization of the Sinkhorn Algorithm},
-  author = {Thornton, James and Cuturi, Marco},
+  author  = {Thornton, James and Cuturi, Marco},
   journal = {arXiv preprint arXiv:2206.07630},
-  year = {2022}
+  title   = {Rethinking Initialization of the Sinkhorn Algorithm},
+  year    = {2022},
 }
 
-@Article{heitz:21,
- author = "Heitz, Matthieu and Bonneel, Nicolas and Coeurjolly, David and Cuturi, Marco and Peyr{\'e}, Gabriel",
- title = "Ground Metric Learning on Graphs",
- journal = "Journal of Mathematical Imaging and Vision",
- year = "2021",
- month = "Jan",
- day = "01",
- volume = "63",
- number = "1",
- pages = "89--107",
- issn = "1573-7683",
- doi = "10.1007/s10851-020-00996-z",
- url = "https://doi.org/10.1007/s10851-020-00996-z"
+@article{heitz:21,
+  author  = {Heitz, Matthieu and Bonneel, Nicolas and Coeurjolly, David and Cuturi, Marco and Peyré, Gabriel},
+  url     = {https://doi.org/10.1007/s10851-020-00996-z},
+  doi     = {10.1007/s10851-020-00996-z},
+  issn    = {1573-7683},
+  journal = {Journal of Mathematical Imaging and Vision},
+  number  = {1},
+  pages   = {89--107},
+  title   = {Ground Metric Learning on Graphs},
+  volume  = {63},
+  year    = {2021},
 }
 
 @article{santambrogio:15,
-  title={Optimal transport for applied mathematicians},
-  author={Santambrogio, Filippo},
-  journal={Birk{\"a}user, NY},
-  volume={55},
-  number={58-63},
-  pages={94},
-  year={2015},
-  publisher={Springer}
+  author    = {Santambrogio, Filippo},
+  publisher = {Springer},
+  journal   = {Birkäuser, NY},
+  number    = {58-63},
+  pages     = {94},
+  title     = {Optimal transport for applied mathematicians},
+  volume    = {55},
+  year      = {2015},
 }
 
 @article{cholmod:08,
- author = {Chen, Yanqing and Davis, Timothy A. and Hager, William W. and Rajamanickam, Sivasankaran},
- title = {Algorithm 887: CHOLMOD, Supernodal Sparse Cholesky Factorization and Update/Downdate},
- year = {2008},
- issue_date = {October 2008},
- publisher = {Association for Computing Machinery},
- address = {New York, NY, USA},
- volume = {35},
- number = {3},
- issn = {0098-3500},
- url = {https://doi.org/10.1145/1391989.1391995},
- doi = {10.1145/1391989.1391995},
- journal = {ACM Trans. Math. Softw.},
- month = {oct},
- articleno = {22},
- numpages = {14},
- keywords = {sparse matrices, Cholesky factorization, linear equations}
+  author    = {Chen, Yanqing and Davis, Timothy A. and Hager, William W. and Rajamanickam, Sivasankaran},
+  location  = {New York, NY, USA},
+  publisher = {Association for Computing Machinery},
+  url       = {https://doi.org/10.1145/1391989.1391995},
+  doi       = {10.1145/1391989.1391995},
+  issn      = {0098-3500},
+  journal   = {ACM Trans. Math. Softw.},
+  keywords  = {sparse matrices,Cholesky factorization,linear equations},
+  month     = oct,
+  number    = {3},
+  title     = {Algorithm 887: CHOLMOD, Supernodal Sparse Cholesky Factorization and Update/Downdate},
+  volume    = {35},
+  year      = {2008},
 }
 
 @article{crane:13,
- author = {Crane, Keenan and Weischedel, Clarisse and Wardetzky, Max},
- title = {Geodesics in Heat: A New Approach to Computing Distance Based on Heat Flow},
- year = {2013},
- issue_date = {September 2013},
- publisher = {Association for Computing Machinery},
- address = {New York, NY, USA},
- volume = {32},
- number = {5},
- issn = {0730-0301},
- url = {https://doi.org/10.1145/2516971.2516977},
- doi = {10.1145/2516971.2516977},
- journal = {ACM Trans. Graph.},
- month = {oct},
- articleno = {152},
- numpages = {11},
- keywords = {heat kernel, discrete differential geometry, geodesic distance, Digital geometry processing,
-  distance transform}
+  author    = {Crane, Keenan and Weischedel, Clarisse and Wardetzky, Max},
+  location  = {New York, NY, USA},
+  publisher = {Association for Computing Machinery},
+  url       = {https://doi.org/10.1145/2516971.2516977},
+  doi       = {10.1145/2516971.2516977},
+  issn      = {0730-0301},
+  journal   = {ACM Trans. Graph.},
+  keywords  = {heat kernel,discrete differential geometry,geodesic distance,Digital geometry processing,distance transform},
+  month     = oct,
+  number    = {5},
+  title     = {Geodesics in Heat: A New Approach to Computing Distance Based on Heat Flow},
+  volume    = {32},
+  year      = {2013},
 }
 
 @misc{scetbon:22b,
-  doi = {10.48550/ARXIV.2205.12365},
-  url = {https://arxiv.org/abs/2205.12365},
-  author = {Scetbon, Meyer and Cuturi, Marco},
-  keywords = {Machine Learning (stat.ML), Machine Learning (cs.LG), FOS: Computer and information sciences,
-   FOS: Computer and information sciences},
-  title = {Low-rank Optimal Transport: Approximation, Statistics and Debiasing},
+  author    = {Scetbon, Meyer and Cuturi, Marco},
   publisher = {arXiv},
-  year = {2022},
-  copyright = {Creative Commons Attribution 4.0 International}
+  url       = {https://arxiv.org/abs/2205.12365},
+  doi       = {10.48550/ARXIV.2205.12365},
+  keywords  = {Machine Learning (stat.ML),Machine Learning (cs.LG),FOS: Computer and information sciences,FOS: Computer and information sciences},
+  title     = {Low-rank Optimal Transport: Approximation, Statistics and Debiasing},
+  year      = {2022},
 }
 
 @misc{pooladian:21,
-  doi = {10.48550/ARXIV.2109.12004},
-  url = {https://arxiv.org/abs/2109.12004},
-  author = {Pooladian, Aram-Alexandre and Niles-Weed, Jonathan},
-  keywords = {Statistics Theory (math.ST), Machine Learning (stat.ML), FOS: Mathematics, FOS: Mathematics,
-   FOS: Computer and information sciences, FOS: Computer and information sciences, 62G05},
-  title = {Entropic estimation of optimal transport maps},
+  author    = {Pooladian, Aram-Alexandre and Niles-Weed, Jonathan},
   publisher = {arXiv},
-  year = {2021},
-  copyright = {arXiv.org perpetual, non-exclusive license}
+  url       = {https://arxiv.org/abs/2109.12004},
+  doi       = {10.48550/ARXIV.2109.12004},
+  keywords  = {Statistics Theory (math.ST),Machine Learning (stat.ML),FOS: Mathematics,FOS: Mathematics,FOS: Computer and information sciences,FOS: Computer and information sciences,62G05},
+  title     = {Entropic estimation of optimal transport maps},
+  year      = {2021},
 }
 
-@article{amos:22,
-  title={Meta Optimal Transport},
-  author={Amos, Brandon and Cohen, Samuel and Luise, Giulia and Redko, Ievgen},
-  journal={arXiv preprint arXiv:2206.05262},
-  year={2022}
+@misc{amos:22,
+  author    = {Amos, Brandon and Cohen, Samuel and Luise, Giulia and Redko, Ievgen},
+  publisher = {arXiv},
+  url       = {https://arxiv.org/abs/2206.05262},
+  doi       = {10.48550/ARXIV.2206.05262},
+  keywords  = {Machine Learning (cs.LG),Artificial Intelligence (cs.AI),Machine Learning (stat.ML),FOS: Computer and information sciences,FOS: Computer and information sciences},
+  title     = {Meta Optimal Transport},
+  year      = {2022},
 }
 
 @inproceedings{korotin:21,
-  title={Wasserstein-2 Generative Networks},
-  author={Alexander Korotin and Vage Egiazarian and Arip Asadulaev and Alexander Safin and Evgeny Burnaev},
-  booktitle={International Conference on Learning Representations},
-  year={2021},
-  url={https://openreview.net/forum?id=bEoxzW_EXsa}
+  author    = {Korotin, Alexander and Egiazarian, Vage and Asadulaev, Arip and Safin, Alexander and Burnaev, Evgeny},
+  url       = {https://openreview.net/forum?id=bEoxzW_EXsa},
+  booktitle = {International Conference on Learning Representations},
+  title     = {Wasserstein-2 Generative Networks},
+  year      = {2021},
 }
 
 @book{boyd:04,
-  title={Convex optimization},
-  author={Boyd, Stephen and Boyd, Stephen P and Vandenberghe, Lieven},
-  year={2004},
-  url={https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf},
-  publisher={Cambridge university press}
+  author    = {Boyd, Stephen and Boyd, Stephen P and Vandenberghe, Lieven},
+  publisher = {Cambridge university press},
+  url       = {https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf},
+  title     = {Convex optimization},
+  year      = {2004},
 }
 
 @misc{pooladian:22,
-  doi = {10.48550/ARXIV.2202.08919},
-  url = {https://arxiv.org/abs/2202.08919},
-  author = {Pooladian, Aram-Alexandre and Cuturi, Marco and Niles-Weed, Jonathan},
-  keywords = {Optimization and Control (math.OC), Statistics Theory (math.ST), FOS: Mathematics, FOS: Mathematics},
-  title = {Debiaser Beware: Pitfalls of Centering Regularized Transport Maps},
+  author    = {Pooladian, Aram-Alexandre and Cuturi, Marco and Niles-Weed, Jonathan},
   publisher = {arXiv},
-  year = {2022},
-  copyright = {Creative Commons Attribution 4.0 International}
+  url       = {https://arxiv.org/abs/2202.08919},
+  doi       = {10.48550/ARXIV.2202.08919},
+  keywords  = {Optimization and Control (math.OC),Statistics Theory (math.ST),FOS: Mathematics,FOS: Mathematics},
+  title     = {Debiaser Beware: Pitfalls of Centering Regularized Transport Maps},
+  year      = {2022},
 }
 
-@InProceedings{sejourne:22,
-  title = { Faster Unbalanced Optimal Transport: Translation invariant Sinkhorn and 1-D Frank-Wolfe },
-  author = {Sejourne, Thibault and Vialard, Francois-Xavier and Peyr\'e, Gabriel},
-  booktitle = {Proceedings of The 25th International Conference on Artificial Intelligence and Statistics},
-  pages = {4995--5021},
-  year = {2022},
-  editor = {Camps-Valls, Gustau and Ruiz, Francisco J. R. and Valera, Isabel},
-  volume = {151},
-  series = {Proceedings of Machine Learning Research},
-  month = {28--30 Mar},
+@inproceedings{sejourne:22,
+  author    = {Sejourne, Thibault and Vialard, Francois-Xavier and Peyré, Gabriel},
+  editor    = {Camps-Valls, Gustau and Ruiz, Francisco J. R. and Valera, Isabel},
   publisher = {PMLR},
-  url = {https://proceedings.mlr.press/v151/sejourne22a/sejourne22a.pdf},
-}
-
-@Article{thibault:21,
-  author = {Thibault, Alexis and Chizat, Lénaïc and Dossal, Charles and Papadakis, Nicolas},
-  title = {Overrelaxed Sinkhorn–Knopp Algorithm for Regularized Optimal Transport},
+  url       = {https://proceedings.mlr.press/v151/sejourne22a/sejourne22a.pdf},
+  booktitle = {Proceedings of The 25th International Conference on Artificial Intelligence and Statistics},
+  month     = mar,
+  pages     = {4995--5021},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Faster Unbalanced Optimal Transport: Translation invariant Sinkhorn and 1-D Frank-Wolfe},
+  volume    = {151},
+  year      = {2022},
+}
+
+@article{thibault:21,
+  author  = {Thibault, Alexis and Chizat, Lénaïc and Dossal, Charles and Papadakis, Nicolas},
+  doi     = {10.3390/a14050143},
+  issn    = {1999-4893},
   journal = {Algorithms},
-  volume = {14},
-  year = {2021},
-  number = {5},
-  article-number = {143},
-  issn = {1999-4893},
-  doi = {10.3390/a14050143}
-}
-
-@InProceedings{chen:16,
-  author="Chen, Yukun and Ye, Jianbo and Li, Jia",
-  editor="Leibe, Bastian and Matas, Jiri and Sebe, Nicu and Welling, Max",
-  title="A Distance for HMMs Based on Aggregated Wasserstein Metric and State Registration",
-  booktitle="Computer Vision -- ECCV 2016",
-  year="2016",
-  publisher="Springer International Publishing",
-  address="Cham",
-  pages="451--466",
-  isbn="978-3-319-46466-4"
-}
-
-@ARTICLE{chen:20,
-  author={Chen, Yukun and Ye, Jianbo and Li, Jia},
-  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
-  title={Aggregated Wasserstein Distance and State Registration for Hidden Markov Models},
-  year={2020},
-  volume={42},
-  number={9},
-  pages={2133-2147},
-  doi={10.1109/TPAMI.2019.2908635}
+  number  = {5},
+  title   = {Overrelaxed Sinkhorn–Knopp Algorithm for Regularized Optimal Transport},
+  volume  = {14},
+  year    = {2021},
+}
+
+@inproceedings{chen:16,
+  author    = {Chen, Yukun and Ye, Jianbo and Li, Jia},
+  editor    = {Leibe, Bastian and Matas, Jiri and Sebe, Nicu and Welling, Max},
+  location  = {Cham},
+  publisher = {Springer International Publishing},
+  booktitle = {Computer Vision -- ECCV 2016},
+  isbn      = {978-3-319-46466-4},
+  pages     = {451--466},
+  title     = {A Distance for HMMs Based on Aggregated Wasserstein Metric and State Registration},
+  year      = {2016},
+}
+
+@article{chen:20,
+  author  = {Chen, Yukun and Ye, Jianbo and Li, Jia},
+  doi     = {10.1109/TPAMI.2019.2908635},
+  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+  number  = {9},
+  pages   = {2133--2147},
+  title   = {Aggregated Wasserstein Distance and State Registration for Hidden Markov Models},
+  volume  = {42},
+  year    = {2020},
 }
 
 @inproceedings{amos:23,
-  title={On amortizing convex conjugates for optimal transport},
-  author={Amos, Brandon},
-  booktitle={International Conference on Learning Representations},
-  year={2023},
-  url={https://arxiv.org/abs/2210.12153}
+  author    = {Amos, Brandon},
+  url       = {https://arxiv.org/abs/2210.12153},
+  booktitle = {International Conference on Learning Representations},
+  title     = {On amortizing convex conjugates for optimal transport},
+  year      = {2023},
 }
 
-@ARTICLE{schreck:15,
-  author={Schreck, Amandine and Fort, Gersende and Le Corff, Sylvain and Moulines, Eric},
-  journal={IEEE Journal of Selected Topics in Signal Processing},
-  title={A Shrinkage-Thresholding Metropolis Adjusted Langevin Algorithm for Bayesian Variable Selection},
-  year={2016},
-  volume={10},
-  number={2},
-  pages={366-375},
-  doi={10.1109/JSTSP.2015.2496546}
+@article{schreck:15,
+  author  = {Schreck, Amandine and Fort, Gersende and Le Corff, Sylvain and Moulines, Eric},
+  doi     = {10.1109/JSTSP.2015.2496546},
+  journal = {IEEE Journal of Selected Topics in Signal Processing},
+  number  = {2},
+  pages   = {366--375},
+  title   = {A Shrinkage-Thresholding Metropolis Adjusted Langevin Algorithm for Bayesian Variable Selection},
+  volume  = {10},
+  year    = {2016},
 }
 
 @inproceedings{argyriou:12,
-  author = {Argyriou, Andreas and Foygel, Rina and Srebro, Nathan},
-  booktitle = {Advances in Neural Information Processing Systems},
-  editor = {F. Pereira and C.J. Burges and L. Bottou and K.Q. Weinberger},
-  pages = {},
+  author    = {Argyriou, Andreas and Foygel, Rina and Srebro, Nathan},
+  editor    = {Pereira, F. and Burges, C.J. and Bottou, L. and Weinberger, K.Q.},
   publisher = {Curran Associates, Inc.},
-  title = {Sparse Prediction with the k-Support Norm},
-  url = {https://proceedings.neurips.cc/paper/2012/file/99bcfcd754a98ce89cb86f73acc04645-Paper.pdf},
-  volume = {25},
-  year = {2012}
+  url       = {https://proceedings.neurips.cc/paper/2012/file/99bcfcd754a98ce89cb86f73acc04645-Paper.pdf},
+  booktitle = {Advances in Neural Information Processing Systems},
+  title     = {Sparse Prediction with the k-Support Norm},
+  volume    = {25},
+  year      = {2012},
 }
 
 @article{zou:05,
-  author = {Zou, Hui and Hastie, Trevor},
-  title = {Regularization and variable selection via the elastic net},
-  journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
-  volume = {67},
-  number = {2},
-  pages = {301-320},
-  keywords = {Grouping effect, LARS algorithm, Lasso, Penalization, p≫n problem, Variable selection},
-  doi = {https://doi.org/10.1111/j.1467-9868.2005.00503.x},
-  url = {https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/j.1467-9868.2005.00503.x},
-  eprint = {https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/j.1467-9868.2005.00503.x},
-  year = {2005}
+  author   = {Zou, Hui and Hastie, Trevor},
+  url      = {https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/j.1467-9868.2005.00503.x},
+  doi      = {https://doi.org/10.1111/j.1467-9868.2005.00503.x},
+  eprint   = {https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/j.1467-9868.2005.00503.x},
+  journal  = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
+  keywords = {Grouping effect,LARS algorithm,Lasso,Penalization,p≫n problem,Variable selection},
+  number   = {2},
+  pages    = {301--320},
+  title    = {Regularization and variable selection via the elastic net},
+  volume   = {67},
+  year     = {2005},
 }
 
 @article{jacobs:20,
-  title={A fast approach to optimal transport: The back-and-forth method},
-  author={Jacobs, Matt and L{\'e}ger, Flavien},
-  journal={Numerische Mathematik},
-  volume={146},
-  number={3},
-  pages={513--544},
-  year={2020},
-  publisher={Springer}
+  author    = {Jacobs, Matt and Léger, Flavien},
+  publisher = {Springer},
+  journal   = {Numerische Mathematik},
+  number    = {3},
+  pages     = {513--544},
+  title     = {A fast approach to optimal transport: The back-and-forth method},
+  volume    = {146},
+  year      = {2020},
 }
 
-
 @phdthesis{bertsekas:71,
-  title={Control of uncertain systems with a set-membership description of the uncertainty.},
-  author={Bertsekas, Dimitri P},
-  year={1971},
-  school={Massachusetts Institute of Technology}
+  author = {Bertsekas, Dimitri P},
+  school = {Massachusetts Institute of Technology},
+  title  = {Control of uncertain systems with a set-membership description of the uncertainty.},
+  type   = {phdthesis},
+  year   = {1971},
 }
 
 @book{danskin:67,
-  title={The Theory of Max-Min and its Application to Weapons Allocation Problems},
-  author={Danskin, John M},
-  publisher={Springer},
-  year={1967}
+  author    = {Danskin, John M},
+  publisher = {Springer},
+  title     = {The Theory of Max-Min and its Application to Weapons Allocation Problems},
+  year      = {1967},
+}
+
+@inproceedings{cuturi:17,
+  author    = {Cuturi, Marco and Blondel, Mathieu},
+  editor    = {Precup, Doina and Teh, Yee Whye},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v70/cuturi17a.html},
+  booktitle = {Proceedings of the 34th International Conference on Machine Learning},
+  file      = {http://proceedings.mlr.press/v70/cuturi17a/cuturi17a.pdf},
+  month     = aug,
+  pages     = {894--903},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Soft-{DTW}: a Differentiable Loss Function for Time-Series},
+  volume    = {70},
+  year      = {2017},
+}
+
+@inproceedings{blondel:21,
+  author    = {Blondel, Mathieu and Mensch, Arthur and Vert, Jean-Philippe},
+  editor    = {Banerjee, Arindam and Fukumizu, Kenji},
+  publisher = {PMLR},
+  url       = {https://proceedings.mlr.press/v130/blondel21a.html},
+  booktitle = {Proceedings of The 24th International Conference on Artificial Intelligence and Statistics},
+  file      = {http://proceedings.mlr.press/v130/blondel21a/blondel21a.pdf},
+  month     = apr,
+  pages     = {3853--3861},
+  series    = {Proceedings of Machine Learning Research},
+  title     = {Differentiable Divergences Between Time Series},
+  volume    = {130},
+  year      = {2021},
 }
diff --git a/pyproject.toml b/pyproject.toml
index 27c390ef4..c9660b78b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,7 +71,8 @@ test = [
     "coverage[toml]",
     "chex",
     "networkx>=2.5",
-    "scikit-learn>=1.0"
+    "scikit-learn>=1.0",
+    "tslearn>=0.5; python_version < '3.11'",
 ]
 docs = [
     "sphinx>=4.0",
diff --git a/src/ott/geometry/costs.py b/src/ott/geometry/costs.py
index 5fb31241b..495a4131b 100644
--- a/src/ott/geometry/costs.py
+++ b/src/ott/geometry/costs.py
@@ -19,12 +19,23 @@
 
 import jax
 import jax.numpy as jnp
+import numpy as np
 
 from ott.math import fixed_point_loop, matrix_square_root
+from ott.math import utils as mu
 
 __all__ = [
-    "PNormP", "SqPNorm", "Euclidean", "SqEuclidean", "Cosine", "ElasticL1",
-    "ElasticSTVS", "ElasticSqKOverlap", "Bures", "UnbalancedBures"
+    "PNormP",
+    "SqPNorm",
+    "Euclidean",
+    "SqEuclidean",
+    "Cosine",
+    "ElasticL1",
+    "ElasticSTVS",
+    "ElasticSqKOverlap",
+    "Bures",
+    "UnbalancedBures",
+    "SoftDTW",
 ]
 
 
@@ -728,6 +739,85 @@ def tree_unflatten(cls, aux_data, children):  # noqa: D102
     return cls(dim, sigma=sigma, gamma=gamma, **kwargs)
 
 
+@jax.tree_util.register_pytree_node_class
+class SoftDTW(CostFn):
+  """Soft dynamic time warping (DTW) cost :cite:`cuturi:17`.
+
+  Args:
+    gamma: Smoothing parameter for the soft-min operator.
+    ground_cost: Ground cost function. If ``None``,
+      use :class:`~ott.geometry.costs.SqEuclidean`.
+    debiased: Whether to compute the debiased soft-DTW :cite:`blondel:21`.
+  """
+
+  def __init__(
+      self,
+      gamma: float,
+      ground_cost: Optional[CostFn] = None,
+      debiased: bool = False
+  ):
+    self.gamma = gamma
+    self.ground_cost = SqEuclidean() if ground_cost is None else ground_cost
+    self.debiased = debiased
+
+  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+    c_xy = self._soft_dtw(x, y)
+    if self.debiased:
+      return c_xy - 0.5 * (self._soft_dtw(x, x) + self._soft_dtw(y, y))
+    return c_xy
+
+  def _soft_dtw(self, t1: jnp.ndarray, t2: jnp.ndarray) -> float:
+
+    def body(
+        carry: Tuple[jnp.ndarray, jnp.ndarray],
+        current_antidiagonal: jnp.ndarray
+    ) -> Tuple[Tuple[jnp.ndarray, jnp.ndarray], jnp.ndarray]:
+      # modified from: https://github.com/khdlr/softdtw_jax
+      two_ago, one_ago = carry
+
+      diagonal, right, down = two_ago[:-1], one_ago[:-1], one_ago[1:]
+      best = mu.softmin(
+          jnp.stack([diagonal, right, down], axis=-1), self.gamma, axis=-1
+      )
+
+      next_row = best + current_antidiagonal
+      next_row = jnp.pad(next_row, (1, 0), constant_values=jnp.inf)
+
+      return (one_ago, next_row), next_row
+
+    t1 = t1[:, None] if t1.ndim == 1 else t1
+    t2 = t2[:, None] if t2.ndim == 1 else t2
+    dist = self.ground_cost.all_pairs(t1, t2)
+
+    n, m = dist.shape
+    if n < m:
+      dist = dist.T
+      n, m = m, n
+
+    model_matrix = jnp.full((n + m - 1, n), fill_value=jnp.inf)
+    mask = np.tri(n + m - 1, n, k=0, dtype=bool)
+    mask = mask & mask[::-1, ::-1]
+    model_matrix = model_matrix.T.at[mask.T].set(dist.ravel()).T
+
+    init = (
+        jnp.pad(model_matrix[0], (1, 0), constant_values=jnp.inf),
+        jnp.pad(
+            model_matrix[1] + model_matrix[0, 0], (1, 0),
+            constant_values=jnp.inf
+        )
+    )
+
+    (_, carry), _ = jax.lax.scan(body, init, model_matrix[2:])
+    return carry[-1]
+
+  def tree_flatten(self):
+    return (self.gamma, self.ground_cost), {"debiased": self.debiased}
+
+  @classmethod
+  def tree_unflatten(cls, aux_data, children):
+    return cls(*children, **aux_data)
+
+
 def x_to_means_and_covs(x: jnp.ndarray,
                         dimension: int) -> Tuple[jnp.ndarray, jnp.ndarray]:
   """Extract means and covariance matrices of Gaussians from raveled vector.
diff --git a/src/ott/math/utils.py b/src/ott/math/utils.py
index 4dc59a0df..2bae4f817 100644
--- a/src/ott/math/utils.py
+++ b/src/ott/math/utils.py
@@ -17,12 +17,13 @@
 import jax
 import jax.experimental.sparse as jesp
 import jax.numpy as jnp
+import jax.scipy as jsp
 
 if TYPE_CHECKING:
   from ott.geometry import costs
 
 __all__ = [
-    "safe_log", "kl", "js", "sparse_scale", "logsumexp",
+    "safe_log", "kl", "js", "sparse_scale", "logsumexp", "softmin",
     "barycentric_projection"
 ]
 
@@ -40,11 +41,13 @@ def safe_log(  # noqa: D103
   return jnp.where(x > 0., jnp.log(x), jnp.log(eps))
 
 
+# TODO(michalk8): add axis argument
 def kl(p: jnp.ndarray, q: jnp.ndarray) -> float:
   """Kullback-Leilbler divergence."""
   return jnp.vdot(p, (safe_log(p) - safe_log(q)))
 
 
+# TODO(michalk8): add axis argument
 def js(p: jnp.ndarray, q: jnp.ndarray, *, c: float = 0.5) -> float:
   """Jensen-Shannon divergence."""
   return c * (kl(p, q) + kl(q, p))
@@ -113,6 +116,23 @@ def logsumexp_jvp(axis, keepdims, return_sign, primals, tangents):
     return lse, res
 
 
+@functools.partial(jax.custom_vjp, nondiff_argnums=(2,))
+def softmin(
+    x: jnp.ndarray, gamma: float, axis: Optional[int] = None
+) -> jnp.ndarray:
+  r"""Soft-min operator.
+
+  Args:
+    x: Input data.
+    gamma: Smoothing parameter.
+    axis: Axis or axes over which to operate. If ``None``, use flattened input.
+
+  Returns:
+    The soft minimum.
+  """
+  return -gamma * jsp.special.logsumexp(x / -gamma, axis=axis)
+
+
 @functools.partial(jax.vmap, in_axes=[0, 0, None])
 def barycentric_projection(
     matrix: jnp.ndarray, y: jnp.ndarray, cost_fn: "costs.CostFn"
@@ -128,3 +148,15 @@ def barycentric_projection(
     a vector of shape (n,) containing the barycentric projection of matrix.
   """
   return jax.vmap(cost_fn.barycenter, in_axes=[0, None])(matrix, y)
+
+
+softmin.defvjp(
+    lambda x, gamma, axis: (softmin(x, gamma, axis), (x / -gamma, axis)),
+    lambda axis, res, g: (
+        jnp.where(
+            jnp.isinf(res[0]), 0.0,
+            jax.nn.softmax(res[0], axis=axis) *
+            (g if axis is None else jnp.expand_dims(g, axis=axis))
+        ), None
+    )
+)
diff --git a/tests/geometry/costs_test.py b/tests/geometry/costs_test.py
index 5122da0b1..875b6f0c7 100644
--- a/tests/geometry/costs_test.py
+++ b/tests/geometry/costs_test.py
@@ -23,6 +23,11 @@
 from ott.geometry import costs, pointcloud
 from ott.solvers.linear import sinkhorn
 
+try:
+  from tslearn import metrics as ts_metrics
+except ImportError:
+  ts_metrics = None
+
 
 @pytest.mark.fast
 class TestCostFn:
@@ -170,7 +175,6 @@ def test_stronger_regularization_increases_sparsity(
     x = jax.random.normal(rngs[0], (50, d))
     y = jax.random.normal(rngs[1], (71, d))
     xx = jax.random.normal(rngs[2], (25, d))
-    xx = jax.random.normal(rngs[2], (25, d))
     yy = jax.random.normal(rngs[3], (35, d))
 
     sparsity = {False: [], True: []}
@@ -185,3 +189,78 @@ def test_stronger_regularization_increases_sparsity(
 
     for fwd in [False, True]:
       np.testing.assert_array_equal(np.diff(sparsity[fwd]) > 0.0, True)
+
+
+@pytest.mark.skipif(ts_metrics is None, reason="Not supported for Python 3.11")
+@pytest.mark.fast
+class TestSoftDTW:
+
+  @pytest.mark.parametrize("n", [11, 16])
+  @pytest.mark.parametrize("m", [13, 16])
+  @pytest.mark.parametrize("gamma", [1e-3, 1.0, 5])
+  def test_soft_dtw(
+      self, rng: jax.random.PRNGKeyArray, n: int, m: int, gamma: float
+  ):
+    rng1, rng2 = jax.random.split(rng, 2)
+    t1 = jax.random.normal(rng1, (n,))
+    t2 = jax.random.normal(rng2, (m,))
+
+    expected = ts_metrics.soft_dtw(t1, t2, gamma=gamma)
+    actual = costs.SoftDTW(gamma=gamma)(t1, t2)
+
+    np.testing.assert_allclose(actual, expected, rtol=1e-6, atol=1e-6)
+
+  @pytest.mark.parametrize("debiased,jit", [(False, True), (True, False)])
+  def test_soft_dtw_debiased(
+      self,
+      rng: jax.random.PRNGKeyArray,
+      debiased: bool,
+      jit: bool,
+  ):
+    gamma = 1e-1
+    rng1, rng2 = jax.random.split(rng, 2)
+    t1 = jax.random.normal(rng1, (16,))
+    t2 = jax.random.normal(rng2, (32,))
+
+    expected = ts_metrics.soft_dtw(t1, t2, gamma=gamma)
+    if debiased:
+      expected -= 0.5 * (
+          ts_metrics.soft_dtw(t1, t1, gamma=gamma) +
+          ts_metrics.soft_dtw(t2, t2, gamma=gamma)
+      )
+    cost_fn = costs.SoftDTW(gamma=gamma, debiased=debiased)
+    actual = jax.jit(cost_fn)(t1, t2) if jit else cost_fn(t1, t2)
+
+    np.testing.assert_allclose(actual, expected, rtol=1e-6, atol=1e-6)
+    if debiased:
+      assert expected >= 0
+      np.testing.assert_allclose(cost_fn(t1, t1), 0.0, rtol=1e-6, atol=1e-6)
+      np.testing.assert_allclose(cost_fn(t2, t2), 0.0, rtol=1e-6, atol=1e-6)
+
+  @pytest.mark.parametrize("debiased,jit", [(False, False), (True, True)])
+  @pytest.mark.parametrize("gamma", [1e-2, 1])
+  def test_soft_dtw_grad(
+      self, rng: jax.random.PRNGKeyArray, debiased: bool, jit: bool,
+      gamma: float
+  ):
+    rngs = jax.random.split(rng, 4)
+    eps, tol = 1e-3, 1e-5
+    t1 = jax.random.normal(rngs[0], (9,))
+    t2 = jax.random.normal(rngs[1], (16,))
+
+    v_t1 = jax.random.normal(rngs[2], shape=t1.shape)
+    v_t1 = (v_t1 / jnp.linalg.norm(v_t1, axis=-1, keepdims=True)) * eps
+    v_t2 = jax.random.normal(rngs[3], shape=t2.shape) * eps
+    v_t2 = (v_t2 / jnp.linalg.norm(v_t2, axis=-1, keepdims=True)) * eps
+
+    cost_fn = costs.SoftDTW(gamma=gamma, debiased=debiased)
+    grad_cost = jax.grad(cost_fn, argnums=[0, 1])
+    grad_t1, grad_t2 = jax.jit(grad_cost)(t1, t2) if jit else grad_cost(t1, t2)
+
+    expected = cost_fn(t1 + v_t1, t2) - cost_fn(t1 - v_t1, t2)
+    actual = 2 * jnp.vdot(v_t1, grad_t1)
+    np.testing.assert_allclose(actual, expected, rtol=tol, atol=tol)
+
+    expected = cost_fn(t1, t2 + v_t2) - cost_fn(t1, t2 - v_t2)
+    actual = 2 * jnp.vdot(v_t2, grad_t2)
+    np.testing.assert_allclose(actual, expected, rtol=tol, atol=tol)
diff --git a/tests/geometry/graph_test.py b/tests/geometry/graph_test.py
index 1370882ed..2ea0f653c 100644
--- a/tests/geometry/graph_test.py
+++ b/tests/geometry/graph_test.py
@@ -18,7 +18,7 @@
 from ott.solvers.linear import sinkhorn
 
 # we mix both dense/sparse tests
-sksparse = pytest.importorskip("sksparse")
+_ = pytest.importorskip("sksparse", reason="Not supported for Python 3.11")
 
 
 def random_graph(