From dc3032861197ea47aa03365f64665733e0f75f57 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Mon, 1 Feb 2021 08:11:04 -0500 Subject: [PATCH 1/5] Fix multiple resolution when hue variable has no name Fixes #2452 (cherry picked from commit 008f7e0e030681f8047c7d628e6bb3fc3d23e6ff) --- doc/releases/v0.12.0.txt | 4 +++- seaborn/distributions.py | 26 +++++++++++++++++++------- seaborn/tests/test_distributions.py | 11 +++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/doc/releases/v0.12.0.txt b/doc/releases/v0.12.0.txt index 82ab2a9abf..8736df8b3a 100644 --- a/doc/releases/v0.12.0.txt +++ b/doc/releases/v0.12.0.txt @@ -18,13 +18,15 @@ v0.12.0 (Unreleased) - |Enhancement| In :func:`histplot`, added `stat="percent"` as an option for normalization such that bar heights sum to 100 (:pr:`2461`). +- |Enhancement| |Fix| Improved integration with the matplotlib color cycle in most axes-level functions (:pr:`2449`). + - |Fix| In :func:`lineplot, allowed the `dashes` keyword to set the style of a line without mapping a `style` variable (:pr:`2449`). - |Fix| In :func:`rugplot`, fixed a bug that prevented the use of datetime data (:pr:`2458`). - |Fix| In :func:`histplot` and :func:`kdeplot`, fixed a bug where the `alpha` parameter was ignored when `fill=False` (:pr:`2460`). -- |Fix| |Enhancement| Improved integration with the matplotlib color cycle in most axes-level functions (:pr:`2449`). +- |Fix| In :func:`histplot` and :func:`kdeplot`, fixed a bug where the `multiple` was ignored when `hue` was provided a vector without a name (:pr:`2462`). - Made `scipy` an optional dependency and added `pip install seaborn[all]` as a method for ensuring the availability of compatible `scipy` and `statsmodels` libraries at install time. This has a few minor implications for existing code, which are explained in the Github pull request (:pr:`2398`). diff --git a/seaborn/distributions.py b/seaborn/distributions.py index 65d862771a..b70e042970 100644 --- a/seaborn/distributions.py +++ b/seaborn/distributions.py @@ -132,6 +132,15 @@ def has_xy_data(self): # TODO see above points about where this should go return bool({"x", "y"} & set(self.variables)) + def _redundant_with_facets(self, var): + """Return True if var is assigned and is redundant with faceting variables.""" + # TODO likely generally useful but only used in this module for now. + return any( + self.plot_data[var].equals(self.plot_data[facet_var]) + for facet_var in ["row", "col"] + if var in self.plot_data and facet_var in self.plot_data + ) + def _add_legend( self, ax_obj, artist, fill, element, multiple, alpha, artist_kws, legend_kws, @@ -413,11 +422,8 @@ def plot_univariate_histogram( else: common_norm = False - # Turn multiple off if no hue or if hue exists but is redundant with faceting - facet_vars = [self.variables.get(var, None) for var in ["row", "col"]] - if "hue" not in self.variables: - multiple = None - elif self.variables["hue"] in facet_vars: + # Let default alpha look like single histogram if hue is redundant + if self._redundant_with_facets("hue"): multiple = None # Estimate the smoothed kernel densities, for use later @@ -886,6 +892,13 @@ def plot_univariate_density( # Input checking _check_argument("multiple", ["layer", "stack", "fill"], multiple) + # Let default alpha look like single density with redundant hue + if self._redundant_with_facets("hue"): + # Note unlike histplot, we set this to layer rather than None because + # default alpha for layer matches default alpha for a single density. + # But that's kind of messy and we should rethink. + multiple = "layer" + # Always share the evaluation grid when stacking subsets = bool(set(self.variables) - {"x", "y"}) if subsets and multiple in ("stack", "fill"): @@ -916,9 +929,8 @@ def plot_univariate_density( else: sticky_support = [] - # XXX unfilled kdeplot is ignoring if fill: - default_alpha = .25 if multiple == "layer" else .75 + default_alpha = .25 if multiple in "layer" else .75 else: default_alpha = 1 alpha = plot_kws.pop("alpha", default_alpha) # TODO make parameter? diff --git a/seaborn/tests/test_distributions.py b/seaborn/tests/test_distributions.py index a2f8628e0e..4ab3dfd990 100644 --- a/seaborn/tests/test_distributions.py +++ b/seaborn/tests/test_distributions.py @@ -1217,6 +1217,17 @@ def test_hue_dodge(self, long_df): assert_array_almost_equal(layer_xs[1], dodge_xs[1]) assert_array_almost_equal(layer_xs[0], dodge_xs[0] - bw / 2) + def test_hue_as_numpy_dodged(self, long_df): + # https://github.com/mwaskom/seaborn/issues/2452 + + ax = histplot( + long_df, + x="y", hue=long_df["a"].to_numpy(), + multiple="dodge", bins=1, + ) + # Note hue order reversal + assert ax.patches[1].get_x() < ax.patches[0].get_x() + def test_multiple_input_check(self, flat_series): with pytest.raises(ValueError, match="`multiple` must be"): From d30509db7c6bace2dbadfff14c050e7f0c324ca0 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Mon, 1 Feb 2021 11:22:58 -0500 Subject: [PATCH 2/5] Remove unused kwarg --- doc/tutorial/distributions.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorial/distributions.ipynb b/doc/tutorial/distributions.ipynb index 843ee9bb99..59452c5a13 100644 --- a/doc/tutorial/distributions.ipynb +++ b/doc/tutorial/distributions.ipynb @@ -245,7 +245,7 @@ "metadata": {}, "outputs": [], "source": [ - "sns.displot(penguins, x=\"flipper_length_mm\", col=\"sex\", multiple=\"dodge\")" + "sns.displot(penguins, x=\"flipper_length_mm\", col=\"sex\")" ] }, { From dbceeafde2238c464102d6555cf8c57ba12f59fe Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Mon, 1 Feb 2021 13:23:59 -0500 Subject: [PATCH 3/5] Don't try to resolve multiples if there aren't any --- seaborn/distributions.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/seaborn/distributions.py b/seaborn/distributions.py index b70e042970..60a2a69002 100644 --- a/seaborn/distributions.py +++ b/seaborn/distributions.py @@ -234,8 +234,16 @@ def _default_discrete(self): return discrete def _resolve_multiple(self, curves, multiple): + """Modify the density data structure to handle multiple densities.""" + + # Default baselines have all densities starting at 0 + baselines = {k: np.zeros_like(v) for k, v in curves.items()} + + # TODO we should have some central clearinghouse for checking if any + # "grouping" (terminnology?) semantics have been assigned + if "hue" not in self.variables: + return curves, baselines - # Modify the density data structure to handle multiple densities if multiple in ("stack", "fill"): # Setting stack or fill means that the curves share a @@ -270,11 +278,6 @@ def _resolve_multiple(self, curves, multiple): .shift(1, axis=1) .fillna(0)) - else: - - # All densities will start at 0 - baselines = {k: np.zeros_like(v) for k, v in curves.items()} - if multiple == "dodge": # Account for the unique semantic (non-faceting) levels @@ -916,7 +919,7 @@ def plot_univariate_density( log_scale, ) - # Note: raises when no hue and multiple != layer. A problem? + # Adjust densities based on the `multiple` rule densities, baselines = self._resolve_multiple(densities, multiple) # Control the interaction with autoscaling by defining sticky_edges @@ -930,7 +933,7 @@ def plot_univariate_density( sticky_support = [] if fill: - default_alpha = .25 if multiple in "layer" else .75 + default_alpha = .25 if multiple == "layer" else .75 else: default_alpha = 1 alpha = plot_kws.pop("alpha", default_alpha) # TODO make parameter? From b9dbfd433fe1fc3ab1d3f9afd8a59a6b7b5c30c4 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Tue, 2 Feb 2021 20:12:06 -0500 Subject: [PATCH 4/5] Simplify logic that ignored multiple when no hue was assigned --- doc/releases/v0.12.0.txt | 2 ++ seaborn/distributions.py | 29 +++++++---------------------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/doc/releases/v0.12.0.txt b/doc/releases/v0.12.0.txt index 8736df8b3a..da3520b8a0 100644 --- a/doc/releases/v0.12.0.txt +++ b/doc/releases/v0.12.0.txt @@ -28,6 +28,8 @@ v0.12.0 (Unreleased) - |Fix| In :func:`histplot` and :func:`kdeplot`, fixed a bug where the `multiple` was ignored when `hue` was provided a vector without a name (:pr:`2462`). + - |Defaults| In :func:`displot`, the default alpha value now adjusts to a provided `multiple` parameter even when `hue` is not assigned (:pr:`2462`). + - Made `scipy` an optional dependency and added `pip install seaborn[all]` as a method for ensuring the availability of compatible `scipy` and `statsmodels` libraries at install time. This has a few minor implications for existing code, which are explained in the Github pull request (:pr:`2398`). - Following `NEP29 `_, dropped support for Python 3.6 and bumped the minimally-supported versions of the library dependencies. diff --git a/seaborn/distributions.py b/seaborn/distributions.py index 60a2a69002..e9392e442b 100644 --- a/seaborn/distributions.py +++ b/seaborn/distributions.py @@ -132,15 +132,6 @@ def has_xy_data(self): # TODO see above points about where this should go return bool({"x", "y"} & set(self.variables)) - def _redundant_with_facets(self, var): - """Return True if var is assigned and is redundant with faceting variables.""" - # TODO likely generally useful but only used in this module for now. - return any( - self.plot_data[var].equals(self.plot_data[facet_var]) - for facet_var in ["row", "col"] - if var in self.plot_data and facet_var in self.plot_data - ) - def _add_legend( self, ax_obj, artist, fill, element, multiple, alpha, artist_kws, legend_kws, @@ -425,10 +416,6 @@ def plot_univariate_histogram( else: common_norm = False - # Let default alpha look like single histogram if hue is redundant - if self._redundant_with_facets("hue"): - multiple = None - # Estimate the smoothed kernel densities, for use later if kde: # TODO alternatively, clip at min/max bins? @@ -512,7 +499,8 @@ def plot_univariate_histogram( # Default alpha should depend on other parameters if fill: - if multiple == "layer": + # Note: will need to account for other grouping semantics if added + if "hue" in self.variables and multiple == "layer": default_alpha = .5 if element == "bars" else .25 elif kde: default_alpha = .5 @@ -895,13 +883,6 @@ def plot_univariate_density( # Input checking _check_argument("multiple", ["layer", "stack", "fill"], multiple) - # Let default alpha look like single density with redundant hue - if self._redundant_with_facets("hue"): - # Note unlike histplot, we set this to layer rather than None because - # default alpha for layer matches default alpha for a single density. - # But that's kind of messy and we should rethink. - multiple = "layer" - # Always share the evaluation grid when stacking subsets = bool(set(self.variables) - {"x", "y"}) if subsets and multiple in ("stack", "fill"): @@ -933,7 +914,11 @@ def plot_univariate_density( sticky_support = [] if fill: - default_alpha = .25 if multiple == "layer" else .75 + # Note: will need to account for other grouping semantics if added + if multiple == "layer": + default_alpha = .25 + else: + default_alpha = .75 else: default_alpha = 1 alpha = plot_kws.pop("alpha", default_alpha) # TODO make parameter? From ab70c30edaaab180027b3f70052bbc006024c972 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Tue, 2 Feb 2021 20:43:00 -0500 Subject: [PATCH 5/5] Fix typo and remove outdated comment --- doc/releases/v0.12.0.txt | 2 +- seaborn/distributions.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/releases/v0.12.0.txt b/doc/releases/v0.12.0.txt index da3520b8a0..6ae8da68d7 100644 --- a/doc/releases/v0.12.0.txt +++ b/doc/releases/v0.12.0.txt @@ -26,7 +26,7 @@ v0.12.0 (Unreleased) - |Fix| In :func:`histplot` and :func:`kdeplot`, fixed a bug where the `alpha` parameter was ignored when `fill=False` (:pr:`2460`). -- |Fix| In :func:`histplot` and :func:`kdeplot`, fixed a bug where the `multiple` was ignored when `hue` was provided a vector without a name (:pr:`2462`). +- |Fix| In :func:`histplot` and :func:`kdeplot`, fixed a bug where the `multiple` was ignored when `hue` was provided as a vector without a name (:pr:`2462`). - |Defaults| In :func:`displot`, the default alpha value now adjusts to a provided `multiple` parameter even when `hue` is not assigned (:pr:`2462`). diff --git a/seaborn/distributions.py b/seaborn/distributions.py index e9392e442b..0b07d68761 100644 --- a/seaborn/distributions.py +++ b/seaborn/distributions.py @@ -914,7 +914,6 @@ def plot_univariate_density( sticky_support = [] if fill: - # Note: will need to account for other grouping semantics if added if multiple == "layer": default_alpha = .25 else: