Skip to content

Commit

Permalink
Add categorical coordinate variables
Browse files Browse the repository at this point in the history
  • Loading branch information
mwaskom committed Jun 6, 2021
1 parent 8661b96 commit 806f16c
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 37 deletions.
23 changes: 10 additions & 13 deletions seaborn/_core/mappings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from collections import abc

import numpy as np
import pandas as pd
import matplotlib as mpl
Expand All @@ -13,12 +15,15 @@
from typing import Optional, Literal
from pandas import Series
from matplotlib.colors import Colormap, Normalize
from matplotlib.scale import Scale # TODO or our own ScaleWrapper
from .typing import PaletteSpec


class SemanticMapping:
"""Base class for mappings between data and visual attributes."""
def setup(self, data: Series) -> SemanticMapping:

def setup(self, data: Series, scale: Optional[Scale]) -> SemanticMapping:
# TODO why not just implement the GroupMapping setup() here?
raise NotImplementedError()

def __call__(self, x): # TODO types; will need to overload (wheee)
Expand Down Expand Up @@ -54,7 +59,7 @@ def __call__(self, x): # TODO types; will need to overload (wheee)

class GroupMapping(SemanticMapping):
"""Mapping that does not alter any visual properties of the artists."""
def setup(self, data: Series) -> GroupMapping:
def setup(self, data: Series, scale: Optional[Scale]) -> GroupMapping:
self.levels = categorical_order(data)
return self

Expand All @@ -78,20 +83,15 @@ def __init__(
def setup(
self,
data: Series, # TODO generally rename Series arguments to distinguish from DF?
scale: Optional[Scale],
) -> HueMapping:
"""Infer the type of mapping to use and define it using this vector of data."""
palette: Optional[PaletteSpec] = self._input_palette
order: Optional[list] = self._input_order
norm: Optional[Normalize] = self._input_norm
cmap: Optional[Colormap] = None

# TODO We are not going to have the concept of wide-form data within PlotData
# but we will still support it. I think seaborn functions that accept wide-form
# data can explicitly set the hue mapping to be categorical.
# Then we can drop this.
input_format: Literal["long", "wide"] = "long"

map_type = self._infer_map_type(data, palette, norm, input_format)
map_type = self._infer_map_type(data, palette, norm)

# Our goal is to end up with a dictionary mapping every unique
# value in `data` to a color. We will also keep track of the
Expand Down Expand Up @@ -147,17 +147,14 @@ def _infer_map_type(
data: Series,
palette: Optional[PaletteSpec],
norm: Optional[Normalize],
input_format: Literal["long", "wide"],
) -> Optional[Literal["numeric", "categorical", "datetime"]]:
"""Determine how to implement the mapping."""
map_type: Optional[Literal["numeric", "categorical", "datetime"]]
if palette in QUAL_PALETTES:
map_type = "categorical"
elif norm is not None:
map_type = "numeric"
elif isinstance(palette, (dict, list)): # TODO mapping/sequence?
map_type = "categorical"
elif input_format == "wide":
elif isinstance(palette, (abc.Mapping, abc.Sequence)):
map_type = "categorical"
else:
map_type = variable_type(data)
Expand Down
52 changes: 32 additions & 20 deletions seaborn/_core/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .rules import categorical_order
from .data import PlotData
from .mappings import GroupMapping, HueMapping
from .scales import ScaleWrapper, CategoricalScale

from typing import TYPE_CHECKING
if TYPE_CHECKING:
Expand Down Expand Up @@ -50,9 +51,10 @@ def __init__(
"hue": HueMapping(),
}

# TODO we need to default to some sort of agnostic type
self._scales = {
"x": mpl.scale.LinearScale("x"),
"y": mpl.scale.LinearScale("y"),
"x": ScaleWrapper(mpl.scale.LinearScale("x"), "numeric"),
"y": ScaleWrapper(mpl.scale.LinearScale("y"), "numeric"),
}

def on(self) -> Plot:
Expand Down Expand Up @@ -97,6 +99,7 @@ def facet(
row_order: Optional[Vector] = None,
col_wrap: Optional[int] = None,
data: Optional[DataSource] = None,
**grid_kwargs,
# TODO what other parameters? sharex/y?
) -> Plot:

Expand Down Expand Up @@ -136,6 +139,8 @@ def facet(
if "col" in facetspec:
facetspec["col"]["wrap"] = col_wrap

facetspec["grid_kwargs"] = grid_kwargs

self._facetspec = facetspec
self._facetdata = data # TODO messy, but needed if variables are added here

Expand All @@ -154,11 +159,18 @@ def map_hue(
self._mappings["hue"] = HueMapping(palette, order, norm)
return self

def scale_numeric(self, axis, scale="linear", **kwargs) -> Plot:
def scale_numeric(self, var, scale="linear", **kwargs) -> Plot:

scale = mpl.scale.scale_factory(scale, var, **kwargs)
self._scales[var] = ScaleWrapper(scale, "numeric")
return self

scale = mpl.scale.scale_factory(scale, axis, **kwargs)
self._scales[axis] = scale
def scale_categorical(self, var, order=None, formatter=None) -> Plot:

# TODO how to set margins "nicely"?

scale = CategoricalScale(var, order, formatter)
self._scales[var] = ScaleWrapper(scale, "categorical")
return self

def theme(self) -> Plot:
Expand Down Expand Up @@ -223,7 +235,8 @@ def _setup_figure(self):
facet_vars[dim] = name
if dim == "col":
facet_vars["col_wrap"] = self._facetspec[dim]["wrap"]
grid = FacetGrid(facet_data, **facet_vars, pyplot=False)
kwargs = self._facetspec["grid_kwargs"]
grid = FacetGrid(facet_data, **facet_vars, pyplot=False, **kwargs)
grid.set_titles()

self._figure = grid.fig
Expand All @@ -238,8 +251,8 @@ def _setup_figure(self):

axes_list = list(self._facets.axes.flat) if self._ax is None else [self._ax]
for ax in axes_list:
ax.set_xscale(self._scales["x"])
ax.set_yscale(self._scales["y"])
ax.set_xscale(self._scales["x"]._scale)
ax.set_yscale(self._scales["y"]._scale)

# TODO good place to do this? (needs to handle FacetGrid)
obj = self._ax if self._facets is None else self._facets
Expand All @@ -262,7 +275,8 @@ def _setup_mappings(self) -> dict[str, SemanticMapping]:
all_data = pd.concat(
[layer.data.frame.get(var, None) for layer in layers]
).reset_index(drop=True)
mappings[var] = mapping.setup(all_data)
scale = self._scales.get(var, None)
mappings[var] = mapping.setup(all_data, scale)

return mappings

Expand Down Expand Up @@ -359,21 +373,20 @@ def _scale_coords_single(
# for var in "yx":
# if var not in coord_df:
# continue
for var, col in coord_df.items():
for var, data in coord_df.items():

axis = var[0]
axis_obj = getattr(ax, f"{axis}axis")
scale = self._scales[axis]

# TODO should happen upstream, in setup_figure(?), but here for now
# will need to account for order; we don't have that yet
axis_obj.update_units(col)
if scale.order is not None:
data = data[data.isin(scale.order)]

# TODO subset categories based on whether specified in order
...
data = scale.cast(data)
axis_obj.update_units(categorical_order(data))

transform = self._scales[axis].get_transform().transform
scaled = transform(axis_obj.convert_units(col))
out_df.loc[col.index, var] = scaled
scaled = self._scales[axis].forward(axis_obj.convert_units(data))
out_df.loc[data.index, var] = scaled

def _unscale_coords(self, df: DataFrame) -> DataFrame:

Expand All @@ -382,8 +395,7 @@ def _unscale_coords(self, df: DataFrame) -> DataFrame:

for var, col in coord_df.items():
axis = var[0]
invert_scale = self._scales[axis].get_transform().inverted().transform
out_df[var] = invert_scale(coord_df[var])
out_df[var] = self._scales[axis].reverse(coord_df[var])

return out_df

Expand Down
62 changes: 62 additions & 0 deletions seaborn/_core/scales.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from __future__ import annotations

import pandas as pd
from matplotlib.scale import LinearScale

from .rules import categorical_order

from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Optional
from collections.abc import Sequence
from matplotlib.scale import Scale
from .typing import VariableType


class ScaleWrapper:

def __init__(self, scale: Scale, type: VariableType):

self._scale = scale
transform = scale.get_transform()
self.forward = transform.transform
self.reverse = transform.inverted().transform
self.type = type

@property
def order(self):
if hasattr(self._scale, "order"):
return self._scale.order
return None

def cast(self, data):
if hasattr(self._scale, "cast"):
return self._scale.cast(data)
return data


class CategoricalScale(LinearScale):

def __init__(self, axis: str, order: Optional[Sequence], formatter: Optional):
# TODO what type is formatter?

super().__init__(axis)
self.order = order
self.formatter = formatter

def cast(self, data):

data = pd.Series(data)
order = pd.Index(categorical_order(data, self.order))
if self.formatter is None:
order = order.astype(str)
data = data.astype(str)
else:
order = order.map(self.formatter)
data = data.map(self.formatter)

data = pd.Series(pd.Categorical(
data, order, self.order is not None
), index=data.index)

return data
6 changes: 2 additions & 4 deletions seaborn/_core/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@
from typing import TYPE_CHECKING
if TYPE_CHECKING:

from typing import Optional, Union
from typing import Optional, Union, Literal
from collections.abc import Mapping, Hashable
from numpy.typing import ArrayLike
from pandas import DataFrame, Series, Index
from matplotlib.colors import Colormap

Vector = Union[Series, Index, ArrayLike]

PaletteSpec = Optional[Union[str, list, dict, Colormap]]

VariableSpec = Union[Hashable, Vector]

VariableType = Literal["numeric", "categorical", "datetime"]
DataSource = Union[DataFrame, Mapping[Hashable, Vector]]

0 comments on commit 806f16c

Please sign in to comment.