From 5394b934ee6c00951c7d0f0ad0e0de76c071c76b Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Tue, 15 Dec 2020 20:46:11 -0500 Subject: [PATCH 1/4] Add/restore functionality to long-form data processing --- seaborn/_core.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/seaborn/_core.py b/seaborn/_core.py index 0531ba6336..22a159bcfc 100644 --- a/seaborn/_core.py +++ b/seaborn/_core.py @@ -871,20 +871,30 @@ def _assign_variables_longform(self, data=None, **kwargs): # The caller will determine the order of variables in plot_data for key, val in kwargs.items(): - if isinstance(val, (str, bytes)): - # String inputs trigger __getitem__ + # First try to treat the argument as a key for the data collection. + # But be flexible about what can be used as a key. + # Usually it will be a string, but allow numbers or tuples too. + try: + val_as_data_key = val in data or val in index + except (KeyError, TypeError): + val_as_data_key = False + + if val_as_data_key: + + # We know that __getitem__ will work + if val in data: - # First try to get an entry in the data object plot_data[key] = data[val] - variables[key] = val elif val in index: - # Failing that, try to get an entry in the index object plot_data[key] = index[val] - variables[key] = val - else: - # We don't know what this name means - err = f"Could not interpret value `{val}` for parameter `{key}`" - raise ValueError(err) + variables[key] = val + + elif isinstance(val, (str, bytes)): + + # This looks like a column name but we don't know what it means! + + err = f"Could not interpret value `{val}` for parameter `{key}`" + raise ValueError(err) else: @@ -892,7 +902,7 @@ def _assign_variables_longform(self, data=None, **kwargs): # Raise when data is present and a vector can't be combined with it if isinstance(data, pd.DataFrame) and not isinstance(val, pd.Series): - if val is not None and len(data) != len(val): + if np.ndim(val) and len(data) != len(val): val_cls = val.__class__.__name__ err = ( f"Length of {val_cls} vectors must match length of `data`" From 961dbd5574149d6faf2663ae9b9bbc9838ea5a89 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Wed, 16 Dec 2020 07:14:34 -0500 Subject: [PATCH 2/4] Add tests for restored functionality --- seaborn/_core.py | 4 ++-- seaborn/tests/test_core.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/seaborn/_core.py b/seaborn/_core.py index 22a159bcfc..11bff028c5 100644 --- a/seaborn/_core.py +++ b/seaborn/_core.py @@ -898,9 +898,9 @@ def _assign_variables_longform(self, data=None, **kwargs): else: - # Otherwise, assume the value is itself a vector of data + # Otherwise, assume the value is itself data - # Raise when data is present and a vector can't be combined with it + # Raise when data object is present and a vector can't matched if isinstance(data, pd.DataFrame) and not isinstance(val, pd.Series): if np.ndim(val) and len(data) != len(val): val_cls = val.__class__.__name__ diff --git a/seaborn/tests/test_core.py b/seaborn/tests/test_core.py index d60ef6d9d2..e5a90c462f 100644 --- a/seaborn/tests/test_core.py +++ b/seaborn/tests/test_core.py @@ -605,6 +605,36 @@ def test_flat_variables(self, flat_data): # TODO note that most of the other tests that exercise the core # variable assignment code still live in test_relational + @pytest.mark.parametrize("name", [3, 4.5]) + def test_long_numeric_name(self, long_df, name): + + long_df[name] = long_df["x"] + p = VectorPlotter() + p.assign_variables(data=long_df, variables={"x": name}) + assert_array_equal(p.plot_data["x"], long_df[name]) + assert p.variables["x"] == name + + def test_long_hierarchical_index(self, rng): + + cols = pd.MultiIndex.from_product([["a"], ["x", "y"]]) + data = rng.uniform(size=(50, 2)) + df = pd.DataFrame(data, columns=cols) + + name = ("a", "y") + var = "y" + + p = VectorPlotter() + p.assign_variables(data=df, variables={var: name}) + assert_array_equal(p.plot_data[var], df[name]) + assert p.variables[var] == name + + def test_long_scalar_and_data(self, long_df): + + val = 22 + p = VectorPlotter(data=long_df, variables={"x": "x", "y": val}) + assert (p.plot_data["y"] == val).all() + assert p.variables["y"] is None + def test_wide_semantic_error(self, wide_df): err = "The following variable cannot be assigned with wide-form data: `hue`" From 82a830351cab7ae97e0bf8113dc05f5783b80c25 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Wed, 16 Dec 2020 07:18:21 -0500 Subject: [PATCH 3/4] Update release notes --- doc/releases/v0.11.1.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/releases/v0.11.1.txt b/doc/releases/v0.11.1.txt index 0f9c2fb0d3..49f884b8b0 100644 --- a/doc/releases/v0.11.1.txt +++ b/doc/releases/v0.11.1.txt @@ -2,13 +2,15 @@ v0.11.1 (Unreleased) -------------------- +- |Fix| Restored support for using tuples or numeric keys to reference fields in a long-form `data` object (:pr:`2386`). + - |Fix| Fixed a bug in :func:`lineplot` where NAs were propagating into the confidence interval, sometimes erasing it from the plot (:pr:`2273`). - |Fix| Fixed a bug in :class:`PairGrid`/:func:`pairplot` where diagonal axes would be empty when the grid was not square and the diagonal axes did not contain the marginal plots (:pr:`2270`). - |Fix| Fixed a bug in :class:`PairGrid`/:func:`pairplot` where off-diagonal plots would not appear when column names in `data` had non-string type (:pr:`2368`). -- |Fix| Fixed a bug where categorical dtype information was ignored when data consisted of boolean values (:pr:`2379`). +- |Fix| Fixed a bug where categorical dtype information was ignored when data consisted of boolean or boolean-like values (:pr:`2379`). - |Fix| Fixed a bug in :class:`FacetGrid` where interior tick labels would be hidden when only the orthogonal axis was shared (:pr:`2347`). @@ -18,12 +20,12 @@ v0.11.1 (Unreleased) - |Fix| Fixed a bug in :func:`displot` where the ``row_order`` and ``col_order`` parameters were not used (:pr:`2262`). -- |Fix| Fixed a bug in :class:`PairGrid`/:func:`pairplot` that caused an exception when using `corent=True` and `diag_kind=None` (:pr:`2382`). - -- |Fix| Raised a more informative error in :class:`PairGrid`/:func:`pairplot` when no variables cold be found to define the rows/columns of the grid (:func:`2382`). +- |Fix| Fixed a bug in :class:`PairGrid`/:func:`pairplot` that caused an exception when using `corner=True` and `diag_kind=None` (:pr:`2382`). - |Fix| Fixed a bug in :func:`clustermap` where `annot=False` was ignored (:pr:`2323`). - |Fix| Fixed a bug in :func:`boxenplot` where the `linewidth` parameter was ignored (:func:`2287`). -- |Fix| Raised a more informative error from :func:`clustermap` if row/col color objects have semantic index but data object does not (:pr:`2313`). \ No newline at end of file +- |Fix| Raise a more informative error in :class:`PairGrid`/:func:`pairplot` when no variables can be found to define the rows/columns of the grid (:func:`2382`). + +- |Fix| Raise a more informative error from :func:`clustermap` if row/col color objects have semantic index but data object does not (:pr:`2313`). \ No newline at end of file From f7025f462d56c9f814586b680564fb1f36ee7a7e Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Thu, 17 Dec 2020 07:22:34 -0500 Subject: [PATCH 4/4] Require strings to access index fields --- seaborn/_core.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/seaborn/_core.py b/seaborn/_core.py index 11bff028c5..cccd50aef7 100644 --- a/seaborn/_core.py +++ b/seaborn/_core.py @@ -873,9 +873,14 @@ def _assign_variables_longform(self, data=None, **kwargs): # First try to treat the argument as a key for the data collection. # But be flexible about what can be used as a key. - # Usually it will be a string, but allow numbers or tuples too. + # Usually it will be a string, but allow numbers or tuples too when + # taking from the main data object. Only allow strings to reference + # fields in the index, because otherwise there is too much ambiguity. try: - val_as_data_key = val in data or val in index + val_as_data_key = ( + val in data + or (isinstance(val, (str, bytes)) and val in index) + ) except (KeyError, TypeError): val_as_data_key = False