Skip to content

Commit

Permalink
Add prototype for unit conversion and scaling
Browse files Browse the repository at this point in the history
  • Loading branch information
mwaskom committed Jun 3, 2021
1 parent 7e06479 commit 28f9297
Showing 1 changed file with 113 additions and 21 deletions.
134 changes: 113 additions & 21 deletions seaborn/_new_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ def facet(
# TODO do we want to allow this method to be optional and create
# facets if col or row are defined in Plot()? More convenient...

# TODO another option would be to have this signature be like
# facet(dim, order, wrap, share)
# and expect to call it twice for column and row faceting
# (or have facet_col, facet_row)?

# TODO what should this data structure be?
# We can't initialize a FacetGrid here because that will open a figure
orders = {"col": col_order, "row": row_order}
Expand Down Expand Up @@ -211,13 +216,16 @@ def _setup_figure(self):
# TODO add external API for parameterizing figure, etc.
# TODO add external API for parameterizing FacetGrid if using
# TODO add external API for passing existing ax (maybe in same method)
# TODO add object that handles the "FacetGrid or single Axes?" abstractions

if not hasattr(self, "_facetspec"):
self.facet() # TODO a good way to activate defaults?

# TODO use context manager with theme that has been set
# TODO (or maybe wrap THIS function with context manager; would be cleaner)

if self._facetspec:

facet_data = pd.DataFrame()
facet_vars = {}
for dim in ["row", "col"]:
Expand All @@ -229,13 +237,25 @@ def _setup_figure(self):
facet_vars["col_wrap"] = self._facetspec[dim]["wrap"]
grid = FacetGrid(facet_data, **facet_vars, pyplot=False)
grid.set_titles()

if len(facet_vars) > 2:
zipped = zip(facet_data["row"], facet_data["col"])
facet_keys = pd.Series(zipped, index=facet_data.index)
else:
facet_keys = facet_data.squeeze().astype("category")
axes_map = facet_keys.map(grid.axes_dict)

self._figure = grid.fig
self._facets = grid
self._ax = None
self._facets = grid
self._axes_map = axes_map

else:

self._figure = Figure()
self._facets = None
self._ax = self._figure.add_subplot()
self._facets = None
self._axes_map = None

# TODO good place to do this? (needs to handle FacetGrid)
obj = self._ax if self._facets is None else self._facets
Expand All @@ -244,6 +264,9 @@ def _setup_figure(self):
if name is not None:
obj.set(**{f"{axis}label": name})

# TODO in current _attach, we initialize the units at this point
# TODO we will also need to incorporate the scaling that (could) be set

def _setup_mappings(self) -> dict[str, SemanticMapping]: # TODO literal key

all_data = pd.concat([layer.data.frame for layer in self._layers])
Expand Down Expand Up @@ -283,56 +306,124 @@ def _plot_layer(self, layer, mappings):
data = layer.data
stat = layer.stat

# TODO where does this method come from?
# data = self.as_numeric(layer.data)
df = self._scale_coords(data.frame)

# TODO how to we handle orientation?
# TODO how can we special-case fast aggregations? (i.e. mean, std, etc.)
# TODO should we pass the grouping variables to the Stat and let it handle that?
stat_grouping_vars = [var for var in grouping_vars if var in data]
if stat.orient not in stat_grouping_vars:
stat_grouping_vars.append(stat.orient)
if stat is not None: # TODO or default to Identity, but we'll have groupby cost
data.frame = (
data.frame
stat_grouping_vars = [var for var in grouping_vars if var in data]
if stat.orient not in stat_grouping_vars:
stat_grouping_vars.append(stat.orient)
df = (
df
.groupby(stat_grouping_vars)
.apply(stat)
# TODO unclear why next step is needed, x/y end up in frame AND index
# .drop(stat_grouping_vars, axis=1, errors="ignore")
# TODO next because of https://github.com/pandas-dev/pandas/issues/34809
.drop(stat_grouping_vars, axis=1, errors="ignore")
.reset_index(stat_grouping_vars)
.reset_index(drop=True) # TODO not always needed, can we limit?
)

# Our statistics happen on the scale we want, but then matplotlib is going
# to re-handle the scaling, so we need to invert before handing off
# Note: we don't need to convert back to strings for categories (but we could?)
# data = self.invert_scale(data)
df = self._unscale_coords(df)

# TODO this might make debugging annoying ... should we create new layer object?
layer.data = data

# Something like this?
# TODO pass in split generator that will be the source of ax
ax = self._ax
facets = self._facets
layer.data.frame = df

# TODO the layer.data somehow needs to pick up variables added in Plot.facet()
splitgen = self._make_splitgen(grouping_vars, layer, mappings, ax, facets)
splitgen = self._make_splitgen(grouping_vars, layer, mappings)

layer.mark._plot(splitgen, mappings)

def _scale_coords(self, df):

# TODO any reason to scale the semantics orighere?
out_df = df.drop(["x", "y"], axis=1, errors="ignore").copy(deep=False)
coord_df = df.filter(regex="[xy]")

with pd.option_context("mode.use_inf_as_null", True):
coord_df = coord_df.dropna()

if self._ax is not None:
self._scale_coords_single(coord_df, out_df, self._ax)
else:
grouped = coord_df.groupby(self._axes_map, sort=False)
for ax, ax_df in grouped:
self._scale_coords_single(ax_df, out_df, ax)

# TODO do we need to handle nas again, e.g. if negative values
# went into a log transform?
# cf GH2454

return out_df

def _scale_coords_single(self, coord_df, out_df, ax):

# TODO modify out_df in place or return and handle externally?

# TODO this looped through "yx" in original core ... why?
# for var in "yx":
# if var not in coord_df:
# continue
for var, col in coord_df.items():

axis = getattr(ax, f"{var}axis")

# TODO should happen upstream, in setup_figure(?), but here for now
# will need to account for order; we don't have that yet
axis.update_units(col)

# TODO subset categories based on whether specified in order
...

scaled = axis.get_transform().transform(axis.convert_units(col))
out_df.loc[col.index, var] = scaled

def _unscale_coords(self, df):

out_df = df.drop(["x", "y"], axis=1, errors="ignore").copy(deep=False)
coord_df = df.filter(regex="[xy]")

if self._ax is not None:
self._unscale_coords_single(coord_df, out_df, self._ax)
else:
# TODO the only reason this structure exists in the forward scale func
# is to support unshared categorical axes. I don't think there is any
# situation where numeric axes would have different *transforms*.
# So we should be able to do this in one step in all cases, once
# we are storing information about the scaling centrally.
grouped = coord_df.groupby(self._axes_map, sort=False)
for ax, ax_df in grouped:
self._unscale_coords_single(ax_df, out_df, ax)

return out_df

def _unscale_coords_single(self, coord_df, out_df, ax):

for var, col in coord_df.items():

axis = getattr(ax, f"{var}axis")
inverse_transform = axis.get_transform().inverted()
unscaled = inverse_transform.transform(col)
out_df.loc[col.index, var] = unscaled

def _make_splitgen(
self,
grouping_vars,
layer,
mappings,
ax,
facets,
): # TODO typing

allow_empty = False # TODO

data = layer.data.frame
# TODO join with axes_map to simplify logic below?

ax = self._ax
facets = self._facets

grouping_vars = [var for var in grouping_vars if var in data]
if grouping_vars:
Expand Down Expand Up @@ -375,6 +466,7 @@ def splitgen() -> Generator[dict[str, Any], DataFrame, Axes]:

sub_vars = dict(zip(grouping_vars, key))

# TODO can we use axes_map here?
row = sub_vars.get("row", None)
col = sub_vars.get("col", None)
if row is not None and col is not None:
Expand Down

0 comments on commit 28f9297

Please sign in to comment.