From c8745a44afc1d79664fc56fcc7931f09a6fa5e84 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Mon, 15 Jun 2020 16:53:46 -0400
Subject: [PATCH 01/11] Add basic ecdfplot implementation

---
 seaborn/_statistics.py   |  36 +++++++++++++
 seaborn/distributions.py | 107 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py
index 951795c100..8ab07501cb 100644
--- a/seaborn/_statistics.py
+++ b/seaborn/_statistics.py
@@ -345,3 +345,39 @@ def __call__(self, x1, x2=None, weights=None):
             return self._eval_univariate(x1, weights)
         else:
             return self._eval_bivariate(x1, x2, weights)
+
+
+class ECDF:
+
+    def __init__(self, stat="proportion"):
+
+        _check_argument("stat", ["count", "proportion"], stat)
+        self.stat = stat
+
+    # Do we need bivariate ECDF?
+
+    def _eval_univariate(self, x, weights):
+
+        sorter = np.argsort(x)
+        x = x[sorter]
+        weights = weights[sorter]
+
+        y = weights.cumsum()
+
+        if self.stat == "proportion":
+            y = y / y.max()
+
+        x = np.r_[-np.inf, x]
+        y = np.r_[0, y]
+
+        return y, x
+
+    def __call__(self, x1, weights=None):
+
+        x1 = np.asarray(x1)
+        if weights is None:
+            weights = np.ones_like(x1)
+        else:
+            weights = np.asarray(weights)
+
+        return self._eval_univariate(x1, weights)
diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index c26f583745..f56686fb21 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -18,6 +18,7 @@
 from ._statistics import (
     KDE,
     Histogram,
+    ECDF,
 )
 from .utils import (
     remove_na,
@@ -33,7 +34,7 @@
 )
 
 
-__all__ = ["distplot", "histplot", "kdeplot", "rugplot"]
+__all__ = ["distplot", "histplot", "kdeplot", "ecdfplot", "rugplot"]
 
 # ==================================================================================== #
 # Module documentation
@@ -1128,6 +1129,59 @@ def plot_bivariate_density(
                 ax, artist, fill, False, "layer", 1, artist_kws, {},
             )
 
+    def plot_univariate_ecdf(self, estimate_kws, legend, plot_kws, ax):
+
+        # TODO see notes elsewhere about GH2135
+        cols = list(self.variables)
+
+        # TODO maybe have an option for joint start/end?
+        estimator = ECDF(**estimate_kws)
+
+        # Loop through the subsets, transform and plot the data
+        for sub_vars, sub_data in self._semantic_subsets(
+            "hue", reverse=True, from_comp_data=True,
+        ):
+
+            # "Compute" the ECDF
+            sub_data = sub_data[cols].dropna()
+            observations = sub_data[self.data_variable]
+            weights = sub_data.get("weights", None)
+            stat, vals = estimator(observations, weights)
+
+            # Assign attributes based on semantic mapping
+            artist_kws = plot_kws.copy()
+            if "hue" in self.variables:
+                artist_kws["color"] = self._hue_map(sub_vars["hue"])
+
+            # Work out the orientation of the plot
+            if self.data_variable == "x":
+                plot_args = vals, stat
+                stat_variable = "y"
+            else:
+                plot_args = stat, vals
+                stat_variable = "x"
+
+            # Draw the line for this subset
+            artist, = ax.plot(*plot_args, drawstyle="steps-post", **artist_kws)
+            sticky_edges = getattr(artist.sticky_edges, stat_variable)
+            sticky_edges[:] = 0, 1
+
+        # --- Finalize the plot ----
+        stat = estimator.stat.capitalize()
+        default_x = default_y = ""
+        if self.data_variable == "x":
+            default_y = stat
+        if self.data_variable == "y":
+            default_x = stat
+        self._add_axis_labels(ax, default_x, default_y)
+
+        if "hue" in self.variables and legend:
+            artist = partial(mpl.lines.Line2D, [], [])
+            alpha = plot_kws.get("alpha", 1)
+            self._add_legend(
+                ax, artist, False, False, None, alpha, plot_kws, {},
+            )
+
     def plot_rug(self, height, expand_margins, legend, ax, kws):
 
         kws = _normalize_kwargs(kws, mpl.lines.Line2D)
@@ -1770,6 +1824,57 @@ def kdeplot(
 )
 
 
+def ecdfplot(
+    data=None, *,
+    # Vector variables
+    x=None, y=None, hue=None, weights=None,
+    # Computation parameters
+    stat="proportion",
+    # Hue mapping parameters
+    palette=None, hue_order=None, hue_norm=None, color=None,
+    # Axes information
+    log_scale=None, legend=True, ax=None,
+    # Other appearance keywords
+    **kwargs,
+):
+
+    p = _DistributionPlotter(
+        data=data,
+        variables=_DistributionPlotter.get_semantics(locals())
+    )
+
+    p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
+
+    # We could support other semantics (size, style) here fairly easily
+    # But it would make distplot a bit more complicated.
+    # It's always possible to add features like that later, so I am going to defer.
+    # It will be even easier to wait until after there is a more general/abstract
+    # way to go from semantic specs to artist attributes.
+
+    if ax is None:
+        ax = plt.gca()
+
+    # We could add this one day, but it's of dubious value
+    if not p.univariate:
+        raise NotImplementedError("Bivariate ECDF plots are not implemented")
+
+    # Attach the axes to the plotter, setting up unit conversions
+    p._attach(ax, log_scale=log_scale)
+
+    estimate_kws = dict(
+        stat=stat,
+    )
+
+    p.plot_univariate_ecdf(
+        estimate_kws=estimate_kws,
+        legend=legend,
+        plot_kws=kwargs,
+        ax=ax,
+    )
+
+    return ax
+
+
 @_deprecate_positional_args
 def rugplot(
     x=None,  # Allow positional x, because behavior won't change

From 96744c27fe614d14139f02085c2d879bff333ad1 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Mon, 15 Jun 2020 17:03:28 -0400
Subject: [PATCH 02/11] Allow user to override drawstyle

---
 seaborn/distributions.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index f56686fb21..c3a16c2c98 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -1137,6 +1137,9 @@ def plot_univariate_ecdf(self, estimate_kws, legend, plot_kws, ax):
         # TODO maybe have an option for joint start/end?
         estimator = ECDF(**estimate_kws)
 
+        # Allow other drawstyles (I'm not sure why you'd want them)
+        plot_kws.setdefault("drawstyle", "steps-post")
+
         # Loop through the subsets, transform and plot the data
         for sub_vars, sub_data in self._semantic_subsets(
             "hue", reverse=True, from_comp_data=True,
@@ -1162,7 +1165,7 @@ def plot_univariate_ecdf(self, estimate_kws, legend, plot_kws, ax):
                 stat_variable = "x"
 
             # Draw the line for this subset
-            artist, = ax.plot(*plot_args, drawstyle="steps-post", **artist_kws)
+            artist, = ax.plot(*plot_args, **artist_kws)
             sticky_edges = getattr(artist.sticky_edges, stat_variable)
             sticky_edges[:] = 0, 1
 

From d2d3dca8917717b072b6a1f7734829f8307d0ce1 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Mon, 15 Jun 2020 20:53:15 -0400
Subject: [PATCH 03/11] Add unit tests

---
 seaborn/_statistics.py              |  6 +--
 seaborn/distributions.py            |  7 ++-
 seaborn/tests/test_distributions.py | 73 ++++++++++++++++++++++++-
 seaborn/tests/test_statistics.py    | 84 +++++++++++++++++++++++++----
 4 files changed, 155 insertions(+), 15 deletions(-)

diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py
index 8ab07501cb..2e31831dec 100644
--- a/seaborn/_statistics.py
+++ b/seaborn/_statistics.py
@@ -357,8 +357,8 @@ def __init__(self, stat="proportion"):
     # Do we need bivariate ECDF?
 
     def _eval_univariate(self, x, weights):
-
-        sorter = np.argsort(x)
+        """Inner function for ECDF of one variable."""
+        sorter = x.argsort()
         x = x[sorter]
         weights = weights[sorter]
 
@@ -373,7 +373,7 @@ def _eval_univariate(self, x, weights):
         return y, x
 
     def __call__(self, x1, weights=None):
-
+        """Return proportion or count of observations below each sorted datapoint."""
         x1 = np.asarray(x1)
         if weights is None:
             weights = np.ones_like(x1)
diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index c3a16c2c98..141c360b33 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -1164,10 +1164,15 @@ def plot_univariate_ecdf(self, estimate_kws, legend, plot_kws, ax):
                 plot_args = stat, vals
                 stat_variable = "x"
 
+            if estimator.stat == "count":
+                top_edge = len(observations)
+            else:
+                top_edge = 1
+
             # Draw the line for this subset
             artist, = ax.plot(*plot_args, **artist_kws)
             sticky_edges = getattr(artist.sticky_edges, stat_variable)
-            sticky_edges[:] = 0, 1
+            sticky_edges[:] = 0, top_edge
 
         # --- Finalize the plot ----
         stat = estimator.stat.capitalize()
diff --git a/seaborn/tests/test_distributions.py b/seaborn/tests/test_distributions.py
index 2ea44faa23..60bebb42ed 100644
--- a/seaborn/tests/test_distributions.py
+++ b/seaborn/tests/test_distributions.py
@@ -26,8 +26,9 @@
 from ..distributions import (
     _DistributionPlotter,
     histplot,
-    rugplot,
+    ecdfplot,
     kdeplot,
+    rugplot,
 )
 
 
@@ -1793,3 +1794,73 @@ def test_colorbar(self, long_df):
         f, (ax, cax) = plt.subplots(2)
         histplot(long_df, x="x", y="y", cbar=True, cbar_ax=cax, ax=ax)
         assert len(ax.figure.axes) == 2
+
+
+class TestECDFPlotUnivariate:
+
+    @pytest.mark.parametrize("variable", ["x", "y"])
+    def test_long_vectors(self, long_df, variable):
+
+        vector = long_df[variable]
+        vectors = [
+            variable, vector, np.asarray(vector), vector.tolist(),
+        ]
+
+        f, ax = plt.subplots()
+        for vector in vectors:
+            ecdfplot(data=long_df, ax=ax, **{variable: vector})
+
+        xdata = [l.get_xdata() for l in ax.lines]
+        for a, b in itertools.product(xdata, xdata):
+            assert_array_equal(a, b)
+
+        ydata = [l.get_ydata() for l in ax.lines]
+        for a, b in itertools.product(ydata, ydata):
+            assert_array_equal(a, b)
+
+    def test_hue(self, long_df):
+
+        ax = ecdfplot(long_df, x="x", hue="a")
+
+        for line, color in zip(ax.lines[::-1], color_palette()):
+            assert line.get_color() == color
+
+    def test_line_kwargs(self, long_df):
+
+        ls = "--"
+        lw = 3
+        ax = ecdfplot(long_df, x="x", hue="a", ls=ls, lw=lw)
+
+        for line in ax.lines:
+            assert line.get_linestyle() == ls
+            assert line.get_linewidth() == lw
+
+    @pytest.mark.parametrize(
+        "data_var,stat_var", [["x", "y"], ["y", "x"]],
+    )
+    def test_proportion_limits(self, flat_series, data_var, stat_var):
+
+        ax = ecdfplot(**{data_var: flat_series})
+        data = getattr(ax.lines[0], f"get_{stat_var}data")()
+        assert data[0] == 0
+        assert data[-1] == 1
+        sticky_edges = getattr(ax.lines[0].sticky_edges, stat_var)
+        assert sticky_edges[:] == [0, 1]
+
+    @pytest.mark.parametrize(
+        "data_var,stat_var", [["x", "y"], ["y", "x"]],
+    )
+    def test_proportion_count(self, flat_series, data_var, stat_var):
+
+        n = len(flat_series)
+        ax = ecdfplot(**{data_var: flat_series}, stat="count")
+        data = getattr(ax.lines[0], f"get_{stat_var}data")()
+        assert data[0] == 0
+        assert data[-1] == n
+        sticky_edges = getattr(ax.lines[0].sticky_edges, stat_var)
+        assert sticky_edges[:] == [0, n]
+
+    def test_bivariate_error(self, long_df):
+
+        with pytest.raises(NotImplementedError, match="Bivariate ECDF plots"):
+            ecdfplot(data=long_df, x="x", y="y")
diff --git a/seaborn/tests/test_statistics.py b/seaborn/tests/test_statistics.py
index 87a63169cf..4d246371ee 100644
--- a/seaborn/tests/test_statistics.py
+++ b/seaborn/tests/test_statistics.py
@@ -1,15 +1,36 @@
 import numpy as np
 from scipy import integrate
 
+try:
+    import statsmodels.distributions as smdist
+except ImportError:
+    smdist = None
+
 import pytest
-from numpy.testing import assert_array_equal
+from numpy.testing import assert_array_equal, assert_array_almost_equal
 
 from .._statistics import (
     KDE,
     Histogram,
+    ECDF,
 )
 
 
+class DistributionFixtures:
+
+    @pytest.fixture
+    def x(self, rng):
+        return rng.normal(0, 1, 100)
+
+    @pytest.fixture
+    def y(self, rng):
+        return rng.normal(0, 5, 100)
+
+    @pytest.fixture
+    def weights(self, rng):
+        return rng.uniform(0, 5, 100)
+
+
 class TestKDE:
 
     def test_gridsize(self, rng):
@@ -127,15 +148,7 @@ def test_bivariate_cumulative(self, rng):
         assert density[-1, -1] == pytest.approx(1, abs=1e-2)
 
 
-class TestHistogram:
-
-    @pytest.fixture
-    def x(self, rng):
-        return rng.normal(0, 1, 100)
-
-    @pytest.fixture
-    def y(self, rng):
-        return rng.normal(0, 5, 100)
+class TestHistogram(DistributionFixtures):
 
     def test_string_bins(self, x):
 
@@ -379,3 +392,54 @@ def test_bad_stat(self):
 
         with pytest.raises(ValueError):
             Histogram(stat="invalid")
+
+
+class TestECDF(DistributionFixtures):
+
+    def test_univariate_proportion(self, x):
+
+        ecdf = ECDF()
+        stat, vals = ecdf(x)
+        assert_array_equal(vals[1:], np.sort(x))
+        assert_array_almost_equal(stat[1:], np.linspace(0, 1, len(x) + 1)[1:])
+        assert stat[0] == 0
+
+    def test_univariate_count(self, x):
+
+        ecdf = ECDF(stat="count")
+        stat, vals = ecdf(x)
+
+        assert_array_equal(vals[1:], np.sort(x))
+        assert_array_almost_equal(stat[1:], np.arange(len(x)) + 1)
+        assert stat[0] == 0
+
+    def test_univariate_proportion_weights(self, x, weights):
+
+        ecdf = ECDF()
+        stat, vals = ecdf(x, weights=weights)
+        assert_array_equal(vals[1:], np.sort(x))
+        assert_array_almost_equal(stat[1:], weights[x.argsort()].cumsum() / weights.sum())
+        assert stat[0] == 0
+
+    def test_univariate_count_weights(self, x, weights):
+
+        ecdf = ECDF(stat="count")
+        stat, vals = ecdf(x, weights=weights)
+        assert_array_equal(vals[1:], np.sort(x))
+        assert_array_almost_equal(stat[1:], weights[x.argsort()].cumsum())
+        assert stat[0] == 0
+
+    @pytest.mark.skipif(smdist is None, reason="Requires statsmodels")
+    def test_against_statsmodels(self, x):
+
+        ecdf = ECDF()
+        stat, vals = ecdf(x)
+
+        sm_ecdf = smdist.empirical_distribution.ECDF(x)
+        assert_array_equal(vals, sm_ecdf.x)
+        assert_array_almost_equal(stat, sm_ecdf.y)
+
+    def test_invalid_stat(self, x):
+
+        with pytest.raises(ValueError, match="`stat` must be one of"):
+            ECDF(stat="density")
\ No newline at end of file

From a3250efdaa746476369b7a46a97b95e1679e19f3 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Mon, 15 Jun 2020 21:14:33 -0400
Subject: [PATCH 04/11] Add docstring content

---
 doc/api.rst                         |  1 +
 seaborn/_docstrings.py              |  3 ++
 seaborn/distributions.py            | 57 ++++++++++++++++++++++++++++-
 seaborn/tests/test_distributions.py |  4 +-
 seaborn/tests/test_statistics.py    |  5 ++-
 5 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/doc/api.rst b/doc/api.rst
index 81cde3ea95..563559c556 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -45,6 +45,7 @@ Distribution plots
 
     distplot
     histplot
+    ecdfplot
     kdeplot
     rugplot
 
diff --git a/seaborn/_docstrings.py b/seaborn/_docstrings.py
index 010dcdb8e5..a00bb79b04 100644
--- a/seaborn/_docstrings.py
+++ b/seaborn/_docstrings.py
@@ -116,6 +116,9 @@ def from_function_params(cls, func):
     """,
     kdeplot="""
 kdeplot : Plot univariate or bivariate distributions using kernel density estimation.
+    """,
+    ecdfplot="""
+ecdfplot : Plot empirical cumulative distribution functions.
     """,
     rugplot="""
 rugplot : Plot a tick at each observation value along the x and/or y axes.
diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index 141c360b33..68e80a30d9 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -1475,6 +1475,7 @@ def histplot(
 --------
 {seealso.kdeplot}
 {seealso.rugplot}
+{seealso.ecdfplot}
 {seealso.jointplot}
 distplot
 
@@ -1781,9 +1782,10 @@ def kdeplot(
 
 See Also
 --------
+{seealso.violinplot}
 {seealso.histplot}
+{seealso.ecdfplot}
 {seealso.rugplot}
-{seealso.violinplot}
 {seealso.jointplot}
 distplot
 
@@ -1839,7 +1841,7 @@ def ecdfplot(
     # Computation parameters
     stat="proportion",
     # Hue mapping parameters
-    palette=None, hue_order=None, hue_norm=None, color=None,
+    palette=None, hue_order=None, hue_norm=None,
     # Axes information
     log_scale=None, legend=True, ax=None,
     # Other appearance keywords
@@ -1883,6 +1885,57 @@ def ecdfplot(
     return ax
 
 
+ecdfplot.__doc__ = """\
+Plot empirical cumulative distribution functions.
+
+An ECDF represents the proportion or count of observations falling below each
+unique value in a dataset. Compared to a histogram or density plot, it has the
+advantage that each observation is visualized directly, meaning that there are
+no binning or smoothing parameters that need to be adjusted. It also aids direct
+comparisons between multiple distributions. A downside is that the relationship
+between the appearance of the plot and the basic properties of the distribution
+(such as its central tendency, variance, and the presence of any bimodality)
+may not be as intuitive.
+
+More information is provided in the :ref:`user guide <userguide_ecdf>`.
+
+Parameters
+----------
+{params.core.data}
+{params.core.xy}
+{params.core.hue}
+weights : vector or key in ``data``
+    If provided, weight the contribution of the corresponding data points
+    towards the distribution by these factors.
+stat : {{"proportion", "count"}}
+    Distribution statistic to compute.
+{params.core.palette}
+{params.core.hue_order}
+{params.core.hue_norm}
+{params.dist.log_scale}
+{params.dist.legend}
+{params.core.ax}
+kwargs
+    Other keyword arguments are passed to :meth:`matplotlib.axes.Axes.plot`.
+
+Returns
+-------
+{returns.ax}
+
+See Also
+--------
+{seealso.histplot}
+{seealso.kdeplot}
+{seealso.rugplot}
+distplot
+
+""".format(
+    params=_param_docs,
+    returns=_core_docs["returns"],
+    seealso=_core_docs["seealso"],
+)
+
+
 @_deprecate_positional_args
 def rugplot(
     x=None,  # Allow positional x, because behavior won't change
diff --git a/seaborn/tests/test_distributions.py b/seaborn/tests/test_distributions.py
index 60bebb42ed..bac5f30149 100644
--- a/seaborn/tests/test_distributions.py
+++ b/seaborn/tests/test_distributions.py
@@ -1827,11 +1827,13 @@ def test_hue(self, long_df):
 
     def test_line_kwargs(self, long_df):
 
+        color = "r"
         ls = "--"
         lw = 3
-        ax = ecdfplot(long_df, x="x", hue="a", ls=ls, lw=lw)
+        ax = ecdfplot(long_df, x="x", color=color, ls=ls, lw=lw)
 
         for line in ax.lines:
+            assert line.get_color() == to_rgb(color)
             assert line.get_linestyle() == ls
             assert line.get_linewidth() == lw
 
diff --git a/seaborn/tests/test_statistics.py b/seaborn/tests/test_statistics.py
index 4d246371ee..e51f77dcc7 100644
--- a/seaborn/tests/test_statistics.py
+++ b/seaborn/tests/test_statistics.py
@@ -418,7 +418,8 @@ def test_univariate_proportion_weights(self, x, weights):
         ecdf = ECDF()
         stat, vals = ecdf(x, weights=weights)
         assert_array_equal(vals[1:], np.sort(x))
-        assert_array_almost_equal(stat[1:], weights[x.argsort()].cumsum() / weights.sum())
+        expected_stats = weights[x.argsort()].cumsum() / weights.sum()
+        assert_array_almost_equal(stat[1:], expected_stats)
         assert stat[0] == 0
 
     def test_univariate_count_weights(self, x, weights):
@@ -442,4 +443,4 @@ def test_against_statsmodels(self, x):
     def test_invalid_stat(self, x):
 
         with pytest.raises(ValueError, match="`stat` must be one of"):
-            ECDF(stat="density")
\ No newline at end of file
+            ECDF(stat="density")

From a211515b4b9b1e55391563824fe67b32389a06bf Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Mon, 15 Jun 2020 21:52:14 -0400
Subject: [PATCH 05/11] Add more docstring information and fix test

---
 seaborn/_statistics.py              | 36 ++++++++++++++++++++++++++++-
 seaborn/distributions.py            |  7 ++----
 seaborn/tests/test_distributions.py |  2 +-
 3 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py
index 2e31831dec..bccd088eec 100644
--- a/seaborn/_statistics.py
+++ b/seaborn/_statistics.py
@@ -1,3 +1,29 @@
+"""Statistical transformations for visualization.
+
+This module is currently private, but is being written to eventually form part
+of the public API.
+
+The classes should behave roughly in the style of scikit-learn.
+
+- All data-independent parameters should be passed to the class constructor.
+- Each class should impelment a default transformation that is exposed through
+  __call__. These are currently written for vector arguements, but I think
+  consuming a whole `plot_data` DataFrame and return it with transformed
+  variables would make more sense.
+- Some class have data-dependent preprocessing that should be cached and used
+  multiple times (think defining histogram bins off all data and then counting
+  observations within each bin multiple times per data subsets). These currently
+  have unique names, but it would be good to have a common name. Not quite
+  `fit`, but something similar.
+- Alternatively, the transform interface could take some information about grouping
+  variables and do a groupby internally.
+- Some classes should define alternate transforms that might make the most sense
+  with a different function. For example, KDE usually evaluates the distribution
+  on a regular grid, but it would be useful for it to transform at the actual
+  datapoints. Then again, this could be controlled by a parameter at  the time of
+  class instantiation.
+
+"""
 from distutils.version import LooseVersion
 from numbers import Number
 import numpy as np
@@ -348,9 +374,17 @@ def __call__(self, x1, x2=None, weights=None):
 
 
 class ECDF:
-
+    """Univariate empirical cumulative distribution estimator."""
     def __init__(self, stat="proportion"):
+        """Initialize the class with its paramters
 
+        Parameters
+        ----------
+        stat : {{"proportion", "count"}}
+            Distribution statistic to compute.
+
+        """
+        # TODO add remove_duplicates
         _check_argument("stat", ["count", "proportion"], stat)
         self.stat = stat
 
diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index 68e80a30d9..58f7615e5b 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -76,6 +76,7 @@
     dist=DocstringComponents(_dist_params),
     kde=DocstringComponents.from_function_params(KDE.__init__),
     hist=DocstringComponents.from_function_params(Histogram.__init__),
+    ecdf=DocstringComponents.from_function_params(ECDF.__init__),
 )
 
 
@@ -1904,11 +1905,7 @@ def ecdfplot(
 {params.core.data}
 {params.core.xy}
 {params.core.hue}
-weights : vector or key in ``data``
-    If provided, weight the contribution of the corresponding data points
-    towards the distribution by these factors.
-stat : {{"proportion", "count"}}
-    Distribution statistic to compute.
+{params.ecdf.stat}
 {params.core.palette}
 {params.core.hue_order}
 {params.core.hue_norm}
diff --git a/seaborn/tests/test_distributions.py b/seaborn/tests/test_distributions.py
index bac5f30149..98f26d63d0 100644
--- a/seaborn/tests/test_distributions.py
+++ b/seaborn/tests/test_distributions.py
@@ -1833,7 +1833,7 @@ def test_line_kwargs(self, long_df):
         ax = ecdfplot(long_df, x="x", color=color, ls=ls, lw=lw)
 
         for line in ax.lines:
-            assert line.get_color() == to_rgb(color)
+            assert to_rgb(line.get_color()) == to_rgb(color)
             assert line.get_linestyle() == ls
             assert line.get_linewidth() == lw
 

From eef3f4b562808b37935133ff464ff231fd31fafa Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Tue, 16 Jun 2020 09:25:48 -0400
Subject: [PATCH 06/11] Add complementary ECDF

---
 seaborn/_statistics.py              | 10 +++++++---
 seaborn/distributions.py            |  4 +++-
 seaborn/tests/test_distributions.py | 12 ++++++++++++
 seaborn/tests/test_statistics.py    |  9 +++++++--
 4 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py
index bccd088eec..f0c8f73a37 100644
--- a/seaborn/_statistics.py
+++ b/seaborn/_statistics.py
@@ -375,18 +375,20 @@ def __call__(self, x1, x2=None, weights=None):
 
 class ECDF:
     """Univariate empirical cumulative distribution estimator."""
-    def __init__(self, stat="proportion"):
+    def __init__(self, stat="proportion", complementary=False):
         """Initialize the class with its paramters
 
         Parameters
         ----------
         stat : {{"proportion", "count"}}
             Distribution statistic to compute.
+        complementary : bool
+            If True, use the complementary CDF (1 - CDF)
 
         """
-        # TODO add remove_duplicates
         _check_argument("stat", ["count", "proportion"], stat)
         self.stat = stat
+        self.complementary = complementary
 
     # Do we need bivariate ECDF?
 
@@ -395,7 +397,6 @@ def _eval_univariate(self, x, weights):
         sorter = x.argsort()
         x = x[sorter]
         weights = weights[sorter]
-
         y = weights.cumsum()
 
         if self.stat == "proportion":
@@ -404,6 +405,9 @@ def _eval_univariate(self, x, weights):
         x = np.r_[-np.inf, x]
         y = np.r_[0, y]
 
+        if self.complementary:
+            y = y.max() - y
+
         return y, x
 
     def __call__(self, x1, weights=None):
diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index 58f7615e5b..4451d83ceb 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -1840,7 +1840,7 @@ def ecdfplot(
     # Vector variables
     x=None, y=None, hue=None, weights=None,
     # Computation parameters
-    stat="proportion",
+    stat="proportion", complementary=False,
     # Hue mapping parameters
     palette=None, hue_order=None, hue_norm=None,
     # Axes information
@@ -1874,6 +1874,7 @@ def ecdfplot(
 
     estimate_kws = dict(
         stat=stat,
+        complementary=complementary,
     )
 
     p.plot_univariate_ecdf(
@@ -1906,6 +1907,7 @@ def ecdfplot(
 {params.core.xy}
 {params.core.hue}
 {params.ecdf.stat}
+{params.ecdf.complementary}
 {params.core.palette}
 {params.core.hue_order}
 {params.core.hue_norm}
diff --git a/seaborn/tests/test_distributions.py b/seaborn/tests/test_distributions.py
index 98f26d63d0..fec93dc1d0 100644
--- a/seaborn/tests/test_distributions.py
+++ b/seaborn/tests/test_distributions.py
@@ -1849,6 +1849,18 @@ def test_proportion_limits(self, flat_series, data_var, stat_var):
         sticky_edges = getattr(ax.lines[0].sticky_edges, stat_var)
         assert sticky_edges[:] == [0, 1]
 
+    @pytest.mark.parametrize(
+        "data_var,stat_var", [["x", "y"], ["y", "x"]],
+    )
+    def test_proportion_limits_complementary(self, flat_series, data_var, stat_var):
+
+        ax = ecdfplot(**{data_var: flat_series}, complementary=True)
+        data = getattr(ax.lines[0], f"get_{stat_var}data")()
+        assert data[0] == 1
+        assert data[-1] == 0
+        sticky_edges = getattr(ax.lines[0].sticky_edges, stat_var)
+        assert sticky_edges[:] == [0, 1]
+
     @pytest.mark.parametrize(
         "data_var,stat_var", [["x", "y"], ["y", "x"]],
     )
diff --git a/seaborn/tests/test_statistics.py b/seaborn/tests/test_statistics.py
index e51f77dcc7..b6661ce7b3 100644
--- a/seaborn/tests/test_statistics.py
+++ b/seaborn/tests/test_statistics.py
@@ -433,13 +433,18 @@ def test_univariate_count_weights(self, x, weights):
     @pytest.mark.skipif(smdist is None, reason="Requires statsmodels")
     def test_against_statsmodels(self, x):
 
+        sm_ecdf = smdist.empirical_distribution.ECDF(x)
+
         ecdf = ECDF()
         stat, vals = ecdf(x)
-
-        sm_ecdf = smdist.empirical_distribution.ECDF(x)
         assert_array_equal(vals, sm_ecdf.x)
         assert_array_almost_equal(stat, sm_ecdf.y)
 
+        ecdf = ECDF(complementary=True)
+        stat, vals = ecdf(x)
+        assert_array_equal(vals, sm_ecdf.x)
+        assert_array_almost_equal(stat, sm_ecdf.y[::-1])
+
     def test_invalid_stat(self, x):
 
         with pytest.raises(ValueError, match="`stat` must be one of"):

From aca54c8a7f4aa0f8ac3fdcbea157e286ea0dec53 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Tue, 16 Jun 2020 10:01:24 -0400
Subject: [PATCH 07/11] Add ecdfplot API examples

---
 doc/docstrings/ecdfplot.ipynb | 130 ++++++++++++++++++++++++++++++++++
 doc/docstrings/histplot.ipynb |   2 +-
 seaborn/distributions.py      |  10 ++-
 3 files changed, 140 insertions(+), 2 deletions(-)
 create mode 100644 doc/docstrings/ecdfplot.ipynb

diff --git a/doc/docstrings/ecdfplot.ipynb b/doc/docstrings/ecdfplot.ipynb
new file mode 100644
index 0000000000..ef41c7c8c9
--- /dev/null
+++ b/doc/docstrings/ecdfplot.ipynb
@@ -0,0 +1,130 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot a univariate distribution along the x axis:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import seaborn as sns; sns.set()\n",
+    "penguins = sns.load_dataset(\"penguins\")\n",
+    "sns.ecdfplot(data=penguins, x=\"flipper_length_mm\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Flip the plot by assigning the data variable to the y axis:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.ecdfplot(data=penguins, y=\"flipper_length_mm\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If neither `x` nor `y` is assigned, the dataset is treated as wide-form, and a histogram is drawn for each numeric column:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.ecdfplot(data=penguins.filter(like=\"culmen_\", axis=\"columns\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also draw multiple histograms from a long-form dataset with hue mapping:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.ecdfplot(data=penguins, x=\"culmen_length_mm\", hue=\"species\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The default distribution statistic is normalized to show a proportion, but you can show absolute counts instead:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.ecdfplot(data=penguins, x=\"culmen_length_mm\", hue=\"species\", stat=\"count\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It's also possible to plot the empirical complementary CDF (1 - CDF):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.ecdfplot(data=penguins, x=\"culmen_length_mm\", hue=\"species\", complementary=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "seaborn-refactor (py38)",
+   "language": "python",
+   "name": "seaborn-refactor"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/doc/docstrings/histplot.ipynb b/doc/docstrings/histplot.ipynb
index 2efde821ef..8e8fc5f7e1 100644
--- a/doc/docstrings/histplot.ipynb
+++ b/doc/docstrings/histplot.ipynb
@@ -103,7 +103,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can also draw multiple histograms from a long-form dataset with hue mapping:"
+    "You can otherwise draw multiple histograms from a long-form dataset with hue mapping:"
    ]
   },
   {
diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index 4451d83ceb..4064921867 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -1146,8 +1146,11 @@ def plot_univariate_ecdf(self, estimate_kws, legend, plot_kws, ax):
             "hue", reverse=True, from_comp_data=True,
         ):
 
-            # "Compute" the ECDF
+            # Compute the ECDF
             sub_data = sub_data[cols].dropna()
+            if sub_data.empty:
+                continue
+
             observations = sub_data[self.data_variable]
             weights = sub_data.get("weights", None)
             stat, vals = estimator(observations, weights)
@@ -1928,6 +1931,11 @@ def ecdfplot(
 {seealso.rugplot}
 distplot
 
+Examples
+--------
+
+.. include:: ../docstrings/ecdfplot.rst
+
 """.format(
     params=_param_docs,
     returns=_core_docs["returns"],

From 5b4f3d27bf43860b9e3fd3d04c2b794aaaca1be3 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Tue, 16 Jun 2020 10:38:00 -0400
Subject: [PATCH 08/11] Fix step plots with y data variable

---
 seaborn/distributions.py            | 13 +++++++++----
 seaborn/tests/test_distributions.py |  7 +++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index 4064921867..05a9d1b883 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -540,8 +540,12 @@ def plot_univariate_histogram(
                     y = np.append(hist["heights"], final["heights"])
                     b = np.append(bottom, bottom[-1])
 
-                    step = "post"
-                    drawstyle = "steps-post"
+                    if self.data_variable == "x":
+                        step = "post"
+                        drawstyle = "steps-post"
+                    else:
+                        step = "post"  # fillbetweenx handles mapping internally
+                        drawstyle = "steps-pre"
 
                 elif element == "poly":
 
@@ -1138,8 +1142,9 @@ def plot_univariate_ecdf(self, estimate_kws, legend, plot_kws, ax):
         # TODO maybe have an option for joint start/end?
         estimator = ECDF(**estimate_kws)
 
-        # Allow other drawstyles (I'm not sure why you'd want them)
-        plot_kws.setdefault("drawstyle", "steps-post")
+        # Set the draw style to step the right way for the data varible
+        drawstyles = dict(x="steps-post", y="steps-pre")
+        plot_kws["drawstyle"] = drawstyles[self.data_variable]
 
         # Loop through the subsets, transform and plot the data
         for sub_vars, sub_data in self._semantic_subsets(
diff --git a/seaborn/tests/test_distributions.py b/seaborn/tests/test_distributions.py
index fec93dc1d0..2593eca382 100644
--- a/seaborn/tests/test_distributions.py
+++ b/seaborn/tests/test_distributions.py
@@ -1837,6 +1837,13 @@ def test_line_kwargs(self, long_df):
             assert line.get_linestyle() == ls
             assert line.get_linewidth() == lw
 
+    @pytest.mark.parametrize("data_var", ["x", "y"])
+    def test_drawstyle(self, flat_series, data_var):
+
+        ax = ecdfplot(**{data_var: flat_series})
+        drawstyles = dict(x="steps-post", y="steps-pre")
+        assert ax.lines[0].get_drawstyle() == drawstyles[data_var]
+
     @pytest.mark.parametrize(
         "data_var,stat_var", [["x", "y"], ["y", "x"]],
     )

From 69076caac6e21c0635a845b42c78041818cdfae2 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Tue, 16 Jun 2020 10:57:16 -0400
Subject: [PATCH 09/11] Housekeeping

---
 seaborn/_statistics.py           | 11 ++++++++---
 seaborn/distributions.py         |  1 -
 seaborn/tests/test_statistics.py |  6 ++++++
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py
index f0c8f73a37..9f6b8dff88 100644
--- a/seaborn/_statistics.py
+++ b/seaborn/_statistics.py
@@ -390,7 +390,9 @@ def __init__(self, stat="proportion", complementary=False):
         self.stat = stat
         self.complementary = complementary
 
-    # Do we need bivariate ECDF?
+    def _eval_bivariate(self, x1, x2, weights):
+        """Inner function for ECDF of two variables."""
+        raise NotImplementedError
 
     def _eval_univariate(self, x, weights):
         """Inner function for ECDF of one variable."""
@@ -410,7 +412,7 @@ def _eval_univariate(self, x, weights):
 
         return y, x
 
-    def __call__(self, x1, weights=None):
+    def __call__(self, x1, x2=None, weights=None):
         """Return proportion or count of observations below each sorted datapoint."""
         x1 = np.asarray(x1)
         if weights is None:
@@ -418,4 +420,7 @@ def __call__(self, x1, weights=None):
         else:
             weights = np.asarray(weights)
 
-        return self._eval_univariate(x1, weights)
+        if x2 is None:
+            return self._eval_univariate(x1, weights)
+        else:
+            return self._eval_bivariate(x1, x2, weights)
diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index 05a9d1b883..54d06a9afe 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -1139,7 +1139,6 @@ def plot_univariate_ecdf(self, estimate_kws, legend, plot_kws, ax):
         # TODO see notes elsewhere about GH2135
         cols = list(self.variables)
 
-        # TODO maybe have an option for joint start/end?
         estimator = ECDF(**estimate_kws)
 
         # Set the draw style to step the right way for the data varible
diff --git a/seaborn/tests/test_statistics.py b/seaborn/tests/test_statistics.py
index b6661ce7b3..4590849e3a 100644
--- a/seaborn/tests/test_statistics.py
+++ b/seaborn/tests/test_statistics.py
@@ -449,3 +449,9 @@ def test_invalid_stat(self, x):
 
         with pytest.raises(ValueError, match="`stat` must be one of"):
             ECDF(stat="density")
+
+    def test_bivariate_error(self, x, y):
+
+        with pytest.raises(NotImplementedError, match="Bivariate ECDF"):
+            ecdf = ECDF()
+            ecdf(x, y)

From 394669bcb560281467598e4c6b73ac457281bd8a Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Tue, 16 Jun 2020 11:16:12 -0400
Subject: [PATCH 10/11] Fix error message

---
 seaborn/_statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py
index 9f6b8dff88..40d5801364 100644
--- a/seaborn/_statistics.py
+++ b/seaborn/_statistics.py
@@ -392,7 +392,7 @@ def __init__(self, stat="proportion", complementary=False):
 
     def _eval_bivariate(self, x1, x2, weights):
         """Inner function for ECDF of two variables."""
-        raise NotImplementedError
+        raise NotImplementedError("Bivariate ECDF is not implemented")
 
     def _eval_univariate(self, x, weights):
         """Inner function for ECDF of one variable."""

From f8fc9152042427af70daba79c84f86d73ad98350 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@nyu.edu>
Date: Tue, 16 Jun 2020 20:51:05 -0400
Subject: [PATCH 11/11] Mention ecdfplot in release notes

---
 doc/releases/v0.11.0.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/releases/v0.11.0.txt b/doc/releases/v0.11.0.txt
index 53c6690c5c..d6e24b693a 100644
--- a/doc/releases/v0.11.0.txt
+++ b/doc/releases/v0.11.0.txt
@@ -9,7 +9,9 @@ v0.11.0 (Unreleased)
 Modernization of distribution functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-First, a new function, :func:`histplot` has been added. :func:`histplot` draws univariate or bivariate histograms with a number of features, including:
+First, two new functions, :func:`histplot` and :func:`ecdfplot` has been added.
+
+:func:`histplot` draws univariate or bivariate histograms with a number of features, including:
 
 - mapping multiple distributions with a ``hue`` semantic
 - normalization to show density, probability, or frequency statistics
@@ -17,6 +19,8 @@ First, a new function, :func:`histplot` has been added. :func:`histplot` draws u
 - adding a KDE fit to show a smoothed distribution over all bin statistics
 - experimental support for histograms over categorical and datetime variables. GH2125
 
+:func:`ecdfplot` draws univariate empirical cumulative distribution functions, using a similar interface.
+
 Second, the existing functions :func:`kdeplot` and :func:`rugplot` have been completely overhauled. Two of the oldest functions in the library, these lacked aspects of the otherwise-common seaborn API, such as the ability to assign variables by name from a ``data`` object; they had no capacity for semantic mapping; and they had numerous other inconsistencies and smaller issues.
 
 The overhauled functions now share a common API with the rest of seaborn, they can show conditional distributions by mapping a third variable with a ``hue`` semantic, and have been improved in numerous other ways. The `github pull request (GH2104) <https://github.com/mwaskom/seaborn/pull/2104>`_ has a longer explanation of the changes and the motivation behind them.