From e8d4fb007479979317b6899d56ee274c619283ed Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Tue, 19 Jul 2022 21:10:15 -0400 Subject: [PATCH 1/7] Make scale config apply to variables computed by the stat transform WIP Fixes #2908, #2885 --- seaborn/_core/plot.py | 70 ++++++++++++++++++++++++---------------- tests/_core/test_plot.py | 26 +++++++++++++++ 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index b74bb2aa24..c1d2a73c51 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -967,26 +967,9 @@ def _transform_coords(self, p: Plot, common: PlotData, layers: list[Layer]) -> N transformed_data.append(pd.Series(dtype=float, index=index, name=var)) for view in subplots: - axis_obj = getattr(view["ax"], f"{axis}axis") - - if share_state in [True, "all"]: - # The all-shared case is easiest, every subplot sees all the data - seed_values = var_df[var] - else: - # Otherwise, we need to setup separate scales for different subplots - if share_state in [False, "none"]: - # Fully independent axes are also easy: use each subplot's data - idx = self._get_subplot_index(var_df, view) - elif share_state in var_df: - # Sharing within row/col is more complicated - use_rows = var_df[share_state] == view[share_state] - idx = var_df.index[use_rows] - else: - # This configuration doesn't make much sense, but it's fine - idx = var_df.index - - seed_values = var_df.loc[idx, var] + axis_obj = getattr(view["ax"], f"{axis}axis") + seed_values = self._get_subplot_data(var_df, var, view, share_state) scale = scale._setup(seed_values, prop, axis=axis_obj) for layer, new_series in zip(layers, transformed_data): @@ -1073,6 +1056,28 @@ def _get_scale( return scale + def _get_subplot_data(self, df, var, view, share_state): + + if share_state in [True, "all"]: + # The all-shared case is easiest, every subplot sees all the data + seed_values = df[var] + else: + # Otherwise, we need to setup separate scales for different subplots + if share_state in [False, "none"]: + # Fully independent axes are also easy: use each subplot's data + idx = self._get_subplot_index(df, view) + elif share_state in df: + # Sharing within row/col is more complicated + use_rows = df[share_state] == view[share_state] + idx = df.index[use_rows] + else: + # This configuration doesn't make much sense, but it's fine + idx = df.index + + seed_values = df.loc[idx, var] + + return seed_values + def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: # Identify all of the variables that will be used at some point in the plot @@ -1091,16 +1096,19 @@ def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: continue # Get the data all the distinct appearances of this variable. + cols = [var, "col", "row"] parts = [] for layer in layers: if layer["data"].frame.empty and layer["data"].frames: for df in layer["data"].frames.values(): - parts.append(df.get(var)) + parts.append(df.filter(cols)) else: - parts.append(layer["data"].frame.get(var)) - var_values = pd.concat( - parts, axis=0, join="inner", ignore_index=True - ).rename(var) + parts.append(layer["data"].frame.filter(cols)) + + if parts: + var_df = pd.concat(parts, ignore_index=True) + else: + var_df = pd.DataFrame(columns=cols) # Determine whether this is an coordinate variable # (i.e., x/y, paired x/y, or derivative such as xmax) @@ -1112,7 +1120,7 @@ def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: axis = m["axis"] prop = PROPERTIES.get(var if axis is None else axis, Property()) - scale = self._get_scale(p, var, prop, var_values) + scale = self._get_scale(p, var, prop, var_df[var]) # Initialize the data-dependent parameters of the scale # Note that this returns a copy and does not mutate the original @@ -1127,13 +1135,21 @@ def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: # doesn't make sense or is poorly defined, since we don't use pixels.) self._scales[var] = Scale._identity() else: - scale = scale._setup(var_values, prop) if isinstance(prop, Coordinate): # If we have a coordinate here, we didn't assign a scale for it # in _transform_coords, which means it was added during compute_stat # This allows downstream orientation inference to work properly. # But it feels a little hacky, so perhaps revisit. - scale._priority = 0 # type: ignore + share = self._subplots.subplot_spec[f"share{axis}"] + views = [view for view in self._subplots if view[axis] == var] + for view in views: + axis_obj = getattr(view["ax"], f"{axis}axis") + seed_values = self._get_subplot_data(var_df, var, view, share) + scale = scale._setup(seed_values, prop, axis=axis_obj) + set_scale_obj(view["ax"], axis, scale._matplotlib_scale) + + scale = scale._setup(var_df[var], prop) + scale._priority = 0 # type: ignore self._scales[var] = scale def _plot_layer(self, p: Plot, layer: Layer) -> None: diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index 093e7e2447..ccb319d7a9 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -2,6 +2,7 @@ import itertools import warnings import imghdr +from xml.dom.minidom import Identified import numpy as np import pandas as pd @@ -439,6 +440,31 @@ def test_mark_data_from_datetime(self, long_df): assert_vector_equal(m.passed_data[0]["x"], expected) + def test_computed_var_ticks(self, long_df): + + class Identity(Stat): + def __call__(self, df, groupby, orient, scales): + other = {"x": "y", "y": "x"}[orient] + return df.assign(**{other: df[orient]}) + + tick_locs = [1, 2, 5] + scale = Continuous().tick(at=tick_locs) + p = Plot(long_df, "x").add(MockMark(), Identity()).scale(y=scale).plot() + ax = p._figure.axes[0] + assert_array_equal(ax.get_yticks(), tick_locs) + + def test_computed_var_transform(self, long_df): + + class Identity(Stat): + def __call__(self, df, groupby, orient, scales): + other = {"x": "y", "y": "x"}[orient] + return df.assign(**{other: df[orient]}) + + p = Plot(long_df, "x").add(MockMark(), Identity()).scale(y="log").plot() + ax = p._figure.axes[0] + xfm = ax.yaxis.get_transform().transform + assert_array_equal(xfm([1, 10, 100]), [0, 1, 2]) + def test_facet_categories(self): m = MockMark() From dd104a333a952ea8e8ee0b11e4cacbf2b82a7d49 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Tue, 19 Jul 2022 22:20:52 -0400 Subject: [PATCH 2/7] Don't try to scale faceting variables --- seaborn/_core/plot.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index c1d2a73c51..9c1aed036f 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -1091,8 +1091,13 @@ def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: for var in variables: + if var in ["col", "row"]: + # There's not currently a concept of "scale" for faceting variables + continue + if var in self._scales: - # Scales for coordinate variables added in _transform_coords + # Scales for coordinate variables have already been added + # in _transform_coords so there's nothing to do here continue # Get the data all the distinct appearances of this variable. From eab85c9b193e9d32a04eb03dfcc82aef2f772bee Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Wed, 20 Jul 2022 20:29:54 -0400 Subject: [PATCH 3/7] Further backcompat --- seaborn/_compat.py | 2 ++ seaborn/_core/plot.py | 10 ++++++---- tests/_core/test_plot.py | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/seaborn/_compat.py b/seaborn/_compat.py index 44f409b231..918eb69c7d 100644 --- a/seaborn/_compat.py +++ b/seaborn/_compat.py @@ -121,5 +121,7 @@ def set_scale_obj(ax, axis, scale): trans = scale.get_transform() kws["functions"] = (trans._forward, trans._inverse) method(scale.name, **kws) + axis_obj = getattr(ax, f"{axis}axis") + scale.set_default_locators_and_formatters(axis_obj) else: ax.set(**{f"{axis}scale": scale}) diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 9c1aed036f..06a523aab2 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -1143,18 +1143,20 @@ def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: if isinstance(prop, Coordinate): # If we have a coordinate here, we didn't assign a scale for it # in _transform_coords, which means it was added during compute_stat - # This allows downstream orientation inference to work properly. - # But it feels a little hacky, so perhaps revisit. share = self._subplots.subplot_spec[f"share{axis}"] views = [view for view in self._subplots if view[axis] == var] for view in views: axis_obj = getattr(view["ax"], f"{axis}axis") seed_values = self._get_subplot_data(var_df, var, view, share) - scale = scale._setup(seed_values, prop, axis=axis_obj) - set_scale_obj(view["ax"], axis, scale._matplotlib_scale) + view_scale = scale._setup(seed_values, prop, axis=axis_obj) + set_scale_obj(view["ax"], axis, view_scale._matplotlib_scale) scale = scale._setup(var_df[var], prop) + + # This allows downstream orientation inference to work properly. + # But it feels a little hacky, so perhaps revisit. scale._priority = 0 # type: ignore + self._scales[var] = scale def _plot_layer(self, p: Plot, layer: Layer) -> None: diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index ccb319d7a9..94876c4014 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -2,7 +2,6 @@ import itertools import warnings import imghdr -from xml.dom.minidom import Identified import numpy as np import pandas as pd From 9188812d6924e00e0a530820874f81fe6ba998ff Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Wed, 20 Jul 2022 22:41:35 -0400 Subject: [PATCH 4/7] Remove unreachable line --- seaborn/_core/plot.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 06a523aab2..253480ff63 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -1110,10 +1110,7 @@ def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: else: parts.append(layer["data"].frame.filter(cols)) - if parts: - var_df = pd.concat(parts, ignore_index=True) - else: - var_df = pd.DataFrame(columns=cols) + var_df = pd.concat(parts, ignore_index=True) # Determine whether this is an coordinate variable # (i.e., x/y, paired x/y, or derivative such as xmax) From 4214d6e85116a52fab5c4c6ca85cf009d19fb59a Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Fri, 22 Jul 2022 20:44:35 -0400 Subject: [PATCH 5/7] Consolidate _transform_coords and _setup_scales --- seaborn/_core/plot.py | 237 ++++++++++++++++-------------------------- 1 file changed, 91 insertions(+), 146 deletions(-) diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 253480ff63..88a57bb97b 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -26,7 +26,7 @@ from seaborn._core.scales import Scale from seaborn._core.subplots import Subplots from seaborn._core.groupby import GroupBy -from seaborn._core.properties import PROPERTIES, Property, Coordinate +from seaborn._core.properties import PROPERTIES, Property from seaborn._core.typing import DataSource, VariableSpec, OrderSpec from seaborn._core.rules import categorical_order from seaborn._compat import set_scale_obj @@ -664,10 +664,21 @@ def plot(self, pyplot=False) -> Plotter: common, layers = plotter._extract_data(self) plotter._setup_figure(self, common, layers) - plotter._transform_coords(self, common, layers) + + coord_vars = [v for v in self._variables if re.match(r"^x|y", v)] + plotter._setup_scales(self, common, layers, coord_vars) plotter._compute_stats(self, layers) - plotter._setup_scales(self, layers) + + other_vars = set() # TODO move this into a method + for layer in layers: + if layer["data"].frame.empty and layer["data"].frames: + for df in layer["data"].frames.values(): + other_vars.update(df.columns) + else: + other_vars.update(layer["data"].frame.columns) + other_vars -= set(coord_vars) + plotter._setup_scales(self, common, layers, list(other_vars)) # TODO Remove these after updating other methods # ---- Maybe have debug= param that attaches these when True? @@ -898,95 +909,6 @@ def _setup_figure(self, p: Plot, common: PlotData, layers: list[Layer]) -> None: title_text = ax.set_title(title) title_text.set_visible(show_title) - def _transform_coords(self, p: Plot, common: PlotData, layers: list[Layer]) -> None: - - for var in p._variables: - - # Parse name to identify variable (x, y, xmin, etc.) and axis (x/y) - # TODO should we have xmin0/xmin1 or x0min/x1min? - m = re.match(r"^(?P(?P[x|y])\d*).*", var) - - if m is None: - continue - - prefix = m["prefix"] - axis = m["axis"] - - share_state = self._subplots.subplot_spec[f"share{axis}"] - - # Concatenate layers, using only the relevant coordinate and faceting vars, - # This is unnecessarily wasteful, as layer data will often be redundant. - # But figuring out the minimal amount we need is more complicated. - cols = [var, "col", "row"] - # TODO basically copied from _setup_scales, and very clumsy - layer_values = [common.frame.filter(cols)] - for layer in layers: - if layer["data"].frame is None: - for df in layer["data"].frames.values(): - layer_values.append(df.filter(cols)) - else: - layer_values.append(layer["data"].frame.filter(cols)) - - if layer_values: - var_df = pd.concat(layer_values, ignore_index=True) - else: - var_df = pd.DataFrame(columns=cols) - - prop = Coordinate(axis) - scale = self._get_scale(p, prefix, prop, var_df[var]) - - # Shared categorical axes are broken on matplotlib<3.4.0. - # https://github.com/matplotlib/matplotlib/pull/18308 - # This only affects us when sharing *paired* axes. This is a novel/niche - # behavior, so we will raise rather than hack together a workaround. - if Version(mpl.__version__) < Version("3.4.0"): - from seaborn._core.scales import Nominal - paired_axis = axis in p._pair_spec - cat_scale = isinstance(scale, Nominal) - ok_dim = {"x": "col", "y": "row"}[axis] - shared_axes = share_state not in [False, "none", ok_dim] - if paired_axis and cat_scale and shared_axes: - err = "Sharing paired categorical axes requires matplotlib>=3.4.0" - raise RuntimeError(err) - - # Now loop through each subplot, deriving the relevant seed data to setup - # the scale (so that axis units / categories are initialized properly) - # And then scale the data in each layer. - subplots = [view for view in self._subplots if view[axis] == prefix] - - # Setup the scale on all of the data and plug it into self._scales - # We do this because by the time we do self._setup_scales, coordinate data - # will have been converted to floats already, so scale inference fails - self._scales[var] = scale._setup(var_df[var], prop) - - # Set up an empty series to receive the transformed values. - # We need this to handle piecemeal transforms of categories -> floats. - transformed_data = [] - for layer in layers: - index = layer["data"].frame.index - transformed_data.append(pd.Series(dtype=float, index=index, name=var)) - - for view in subplots: - - axis_obj = getattr(view["ax"], f"{axis}axis") - seed_values = self._get_subplot_data(var_df, var, view, share_state) - scale = scale._setup(seed_values, prop, axis=axis_obj) - - for layer, new_series in zip(layers, transformed_data): - layer_df = layer["data"].frame - if var in layer_df: - idx = self._get_subplot_index(layer_df, view) - new_series.loc[idx] = scale(layer_df.loc[idx, var]) - - # TODO need decision about whether to do this or modify axis transform - set_scale_obj(view["ax"], axis, scale._matplotlib_scale) - - # Now the transformed data series are complete, set update the layer data - for layer, new_series in zip(layers, transformed_data): - layer_df = layer["data"].frame - if var in layer_df: - layer_df[var] = new_series - def _compute_stats(self, spec: Plot, layers: list[Layer]) -> None: grouping_vars = [v for v in PROPERTIES if v not in "xy"] @@ -1078,83 +1000,106 @@ def _get_subplot_data(self, df, var, view, share_state): return seed_values - def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: - - # Identify all of the variables that will be used at some point in the plot - variables = set() - for layer in layers: - if layer["data"].frame.empty and layer["data"].frames: - for df in layer["data"].frames.values(): - variables.update(df.columns) - else: - variables.update(layer["data"].frame.columns) + def _setup_scales( + self, p: Plot, common: PlotData, layers: list[Layer], variables: list[str], + ) -> None: for var in variables: - if var in ["col", "row"]: - # There's not currently a concept of "scale" for faceting variables - continue + # Determine whether this is an coordinate variable + # (i.e., x/y, paired x/y, or derivative such as xmax) + m = re.match(r"^(?P(?Px|y)\d*).*", var) + if m is None: + axis = None + else: + var = m["prefix"] + axis = m["axis"] - if var in self._scales: - # Scales for coordinate variables have already been added - # in _transform_coords so there's nothing to do here + prop_name = var if axis is None else axis + if prop_name not in PROPERTIES: + # Filters out facet vars, maybe others? continue - # Get the data all the distinct appearances of this variable. + # Concatenate layers, using only the relevant coordinate and faceting vars, + # This is unnecessarily wasteful, as layer data will often be redundant. + # But figuring out the minimal amount we need is more complicated. cols = [var, "col", "row"] - parts = [] + parts = [common.frame.filter(cols)] for layer in layers: if layer["data"].frame.empty and layer["data"].frames: for df in layer["data"].frames.values(): parts.append(df.filter(cols)) else: parts.append(layer["data"].frame.filter(cols)) - var_df = pd.concat(parts, ignore_index=True) - # Determine whether this is an coordinate variable - # (i.e., x/y, paired x/y, or derivative such as xmax) - m = re.match(r"^(?P(?Px|y)\d*).*", var) - if m is None: - axis = None + prop = PROPERTIES[prop_name] + scale = self._get_scale(p, var, prop, var_df[var]) + + if var not in p._variables: + # This allows downstream orientation inference to work properly. + # But it feels a little hacky, so perhaps revisit. + scale._priority = 0 # type: ignore + + if axis is None: + # We could think about having a broader concept of shared properties + # In general, not something you want to do (different scales in facets) + # But could make sense e.g. with paired plots. Build later. + share_state = None + subplots = [] else: - var = m["prefix"] - axis = m["axis"] + share_state = self._subplots.subplot_spec[f"share{axis}"] + subplots = [view for view in self._subplots if view[axis] == var] - prop = PROPERTIES.get(var if axis is None else axis, Property()) - scale = self._get_scale(p, var, prop, var_df[var]) + # Shared categorical axes are broken on matplotlib<3.4.0. + # https://github.com/matplotlib/matplotlib/pull/18308 + # This only affects us when sharing *paired* axes. This is a novel/niche + # behavior, so we will raise rather than hack together a workaround. + if axis is not None and Version(mpl.__version__) < Version("3.4.0"): + from seaborn._core.scales import Nominal + paired_axis = axis in p._pair_spec + cat_scale = isinstance(scale, Nominal) + ok_dim = {"x": "col", "y": "row"}[axis] + shared_axes = share_state not in [False, "none", ok_dim] + if paired_axis and cat_scale and shared_axes: + err = "Sharing paired categorical axes requires matplotlib>=3.4.0" + raise RuntimeError(err) - # Initialize the data-dependent parameters of the scale - # Note that this returns a copy and does not mutate the original - # This dictionary is used by the semantic mappings if scale is None: - # TODO what is the cleanest way to implement identity scale? - # We don't really need a Scale, and Identity() will be - # overloaded anyway (but maybe a general Identity object - # that can be used as Scale/Mark/Stat/Move?) - # Note that this may not be the right spacer to use - # (but that is only relevant for coordinates, where identity scale - # doesn't make sense or is poorly defined, since we don't use pixels.) self._scales[var] = Scale._identity() else: - if isinstance(prop, Coordinate): - # If we have a coordinate here, we didn't assign a scale for it - # in _transform_coords, which means it was added during compute_stat - share = self._subplots.subplot_spec[f"share{axis}"] - views = [view for view in self._subplots if view[axis] == var] - for view in views: - axis_obj = getattr(view["ax"], f"{axis}axis") - seed_values = self._get_subplot_data(var_df, var, view, share) - view_scale = scale._setup(seed_values, prop, axis=axis_obj) - set_scale_obj(view["ax"], axis, view_scale._matplotlib_scale) - - scale = scale._setup(var_df[var], prop) + self._scales[var] = scale._setup(var_df[var], prop) - # This allows downstream orientation inference to work properly. - # But it feels a little hacky, so perhaps revisit. - scale._priority = 0 # type: ignore + # Eveything below here applies only to coordinate variables + if axis is None: + continue - self._scales[var] = scale + # Set up an empty series to receive the transformed values. + # We need this to handle piecemeal transforms of categories -> floats. + transformed_data = [] + for layer in layers: + index = layer["data"].frame.index + empty_series = pd.Series(dtype=float, index=index, name=var) + transformed_data.append(empty_series) + + for view in subplots: + + axis_obj = getattr(view["ax"], f"{axis}axis") + seed_values = self._get_subplot_data(var_df, var, view, share_state) + view_scale = scale._setup(seed_values, prop, axis=axis_obj) + set_scale_obj(view["ax"], axis, view_scale._matplotlib_scale) + + for layer, new_series in zip(layers, transformed_data): + layer_df = layer["data"].frame + if var in layer_df: + idx = self._get_subplot_index(layer_df, view) + new_series.loc[idx] = view_scale(layer_df.loc[idx, var]) + + # Now the transformed data series are complete, set update the layer data + for layer, new_series in zip(layers, transformed_data): + layer_df = layer["data"].frame + if var in layer_df: + layer_df[var] = new_series def _plot_layer(self, p: Plot, layer: Layer) -> None: From e5dc350b5d231833f10f7c836340f629e95e0742 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Sat, 23 Jul 2022 18:12:52 -0400 Subject: [PATCH 6/7] Improve handling of scaling edge cases --- seaborn/_core/plot.py | 39 +++++++++++++++++++++++---------------- tests/_core/test_plot.py | 21 +++++++++++++++++++++ 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 88a57bb97b..277b14dd16 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -1006,18 +1006,20 @@ def _setup_scales( for var in variables: - # Determine whether this is an coordinate variable + # Determine whether this is a coordinate variable # (i.e., x/y, paired x/y, or derivative such as xmax) - m = re.match(r"^(?P(?Px|y)\d*).*", var) + m = re.match(r"^(?P(?Px|y)\d*).*", var) if m is None: - axis = None + coord = axis = None else: - var = m["prefix"] + coord = m["coord"] axis = m["axis"] - prop_name = var if axis is None else axis - if prop_name not in PROPERTIES: - # Filters out facet vars, maybe others? + # Get keys that handle things like x0, xmax, properly where relevant + prop_key = var if axis is None else axis + scale_key = var if coord is None else coord + + if prop_key not in PROPERTIES: continue # Concatenate layers, using only the relevant coordinate and faceting vars, @@ -1033,23 +1035,24 @@ def _setup_scales( parts.append(layer["data"].frame.filter(cols)) var_df = pd.concat(parts, ignore_index=True) - prop = PROPERTIES[prop_name] - scale = self._get_scale(p, var, prop, var_df[var]) + prop = PROPERTIES[prop_key] + scale = self._get_scale(p, scale_key, prop, var_df[var]) - if var not in p._variables: - # This allows downstream orientation inference to work properly. - # But it feels a little hacky, so perhaps revisit. + if scale_key not in p._variables: + # TODO this implies that the variable was added by the stat + # It allows downstream orientation inference to work properly. + # But it feels rather hacky, so ideally revisit. scale._priority = 0 # type: ignore if axis is None: - # We could think about having a broader concept of shared properties + # We could think about having a broader concept of (un)shared properties # In general, not something you want to do (different scales in facets) # But could make sense e.g. with paired plots. Build later. share_state = None subplots = [] else: share_state = self._subplots.subplot_spec[f"share{axis}"] - subplots = [view for view in self._subplots if view[axis] == var] + subplots = [view for view in self._subplots if view[axis] == coord] # Shared categorical axes are broken on matplotlib<3.4.0. # https://github.com/matplotlib/matplotlib/pull/18308 @@ -1070,8 +1073,12 @@ def _setup_scales( else: self._scales[var] = scale._setup(var_df[var], prop) - # Eveything below here applies only to coordinate variables - if axis is None: + # Everything below here applies only to coordinate variables + # We additionally skip it when we're working with a value + # that is derived from a coordinate we've already processed. + # e.g., the Stat consumed y and added ymin/ymax. In that case, + # we've already setup the y scale and ymin/max are in scale space. + if axis is None or (var != coord and coord in p._variables): continue # Set up an empty series to receive the transformed values. diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index 94876c4014..a05ff22c05 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -464,6 +464,27 @@ def __call__(self, df, groupby, orient, scales): xfm = ax.yaxis.get_transform().transform assert_array_equal(xfm([1, 10, 100]), [0, 1, 2]) + def test_explicit_range_with_axis_scaling(self): + + x = [1, 2, 3] + ymin = [10, 100, 1000] + ymax = [20, 200, 2000] + m = MockMark() + Plot(x=x, ymin=ymin, ymax=ymax).add(m).scale(y="log").plot() + assert_vector_equal(m.passed_data[0]["ymax"], pd.Series(ymax, dtype=float)) + + def test_derived_range_with_axis_scaling(self): + + class AddOne(Stat): + def __call__(self, df, *args): + return df.assign(ymax=df["y"] + 1) + + x = y = [1, 10, 100] + + m = MockMark() + Plot(x, y).add(m, AddOne()).scale(y="log").plot() + assert_vector_equal(m.passed_data[0]["ymax"], pd.Series([10., 100., 1000.])) + def test_facet_categories(self): m = MockMark() From 1ab0e55d218a6a7f282f7a69b783ff873c8ddfac Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Sat, 23 Jul 2022 19:30:20 -0400 Subject: [PATCH 7/7] Clean up Plot.plot by moving some logic to _setup_scales --- seaborn/_core/plot.py | 44 +++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 277b14dd16..c39f9c5445 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -654,40 +654,37 @@ def save(self, fname, **kwargs) -> Plot: def plot(self, pyplot=False) -> Plotter: """ - Compile the plot and return the :class:`Plotter` engine. - + Compile the plot spec and return a Plotter object. """ # TODO if we have _target object, pyplot should be determined by whether it # is hooked into the pyplot state machine (how do we check?) plotter = Plotter(pyplot=pyplot) + # Process the variable assignments and initialize the figure common, layers = plotter._extract_data(self) plotter._setup_figure(self, common, layers) + # Process the scale spec for coordinate variables and transform their data coord_vars = [v for v in self._variables if re.match(r"^x|y", v)] plotter._setup_scales(self, common, layers, coord_vars) + # Apply statistical transform(s) plotter._compute_stats(self, layers) - other_vars = set() # TODO move this into a method - for layer in layers: - if layer["data"].frame.empty and layer["data"].frames: - for df in layer["data"].frames.values(): - other_vars.update(df.columns) - else: - other_vars.update(layer["data"].frame.columns) - other_vars -= set(coord_vars) - plotter._setup_scales(self, common, layers, list(other_vars)) + # Process scale spec for semantic variables and coordinates computed by stat + plotter._setup_scales(self, common, layers) # TODO Remove these after updating other methods # ---- Maybe have debug= param that attaches these when True? plotter._data = common plotter._layers = layers + # Process the data for each layer and add matplotlib artists for layer in layers: plotter._plot_layer(self, layer) + # Add various figure decorations plotter._make_legend(self) plotter._finalize_figure(self) @@ -696,7 +693,6 @@ def plot(self, pyplot=False) -> Plotter: def show(self, **kwargs) -> None: """ Render and display the plot. - """ # TODO make pyplot configurable at the class level, and when not using, # import IPython.display and call on self to populate cell output? @@ -1001,9 +997,23 @@ def _get_subplot_data(self, df, var, view, share_state): return seed_values def _setup_scales( - self, p: Plot, common: PlotData, layers: list[Layer], variables: list[str], + self, p: Plot, + common: PlotData, + layers: list[Layer], + variables: list[str] | None = None, ) -> None: + if variables is None: + # Add variables that have data but not a scale, which happens + # because this method can be called multiple time, to handle + # variables added during the Stat transform. + variables = [] + for layer in layers: + variables.extend(layer["data"].frame.columns) + for df in layer["data"].frames.values(): + variables.extend(v for v in df if v not in variables) + variables = [v for v in variables if v not in self._scales] + for var in variables: # Determine whether this is a coordinate variable @@ -1028,11 +1038,9 @@ def _setup_scales( cols = [var, "col", "row"] parts = [common.frame.filter(cols)] for layer in layers: - if layer["data"].frame.empty and layer["data"].frames: - for df in layer["data"].frames.values(): - parts.append(df.filter(cols)) - else: - parts.append(layer["data"].frame.filter(cols)) + parts.append(layer["data"].frame.filter(cols)) + for df in layer["data"].frames.values(): + parts.append(df.filter(cols)) var_df = pd.concat(parts, ignore_index=True) prop = PROPERTIES[prop_key]