Skip to content

Commit

Permalink
Have map_ methods call scale_ method when appropriate
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Waskom committed Sep 26, 2021
1 parent a8194b4 commit b527b57
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 47 deletions.
27 changes: 6 additions & 21 deletions seaborn/_core/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pandas import Series
from matplotlib.colors import Colormap, Normalize
from matplotlib.scale import Scale
from seaborn._core.typing import PaletteSpec, OrderSpec
from seaborn._core.typing import PaletteSpec

DashPattern = Tuple[float, ...]
DashPatternWithOffset = Tuple[float, Optional[DashPattern]]
Expand Down Expand Up @@ -51,13 +51,7 @@ def setup(
) -> LookupMapping:

provided = self._provided
if self.order is not None:
order = self.order
elif scale is not None:
order = scale.order
else:
order = None

order = None if scale is None else scale.order
levels = categorical_order(data, order)

if provided is None:
Expand Down Expand Up @@ -145,10 +139,9 @@ class FillSemantic(BinarySemantic):

class ColorSemantic(Semantic):

def __init__(self, palette: PaletteSpec = None, order: OrderSpec = None):
def __init__(self, palette: PaletteSpec = None):

self._palette = palette
self.order = order

def __call__(self, x): # TODO types; will need to overload

Expand Down Expand Up @@ -190,13 +183,7 @@ def setup(

# TODO allow configuration of norm in mapping methods like we do with order?
norm = None if scale is None else scale.norm

if self.order is not None:
order = self.order
elif scale is not None:
order = scale.order
else:
order = None
order = None if scale is None else scale.order

# TODO We need to add some input checks ...
# e.g. specifying a numeric scale and a qualitative colormap should fail nicely.
Expand Down Expand Up @@ -297,7 +284,7 @@ class MarkerSemantic(DiscreteSemantic):
_semantic = "marker"

# TODO full types
def __init__(self, shapes: list | dict | None = None, order: OrderSpec = None):
def __init__(self, shapes: list | dict | None = None):

# TODO fill or filled parameter?
# allow singletons? e.g. map_marker(shapes="o", filled=[True, False])?
Expand All @@ -309,7 +296,6 @@ def __init__(self, shapes: list | dict | None = None, order: OrderSpec = None):
shapes = {k: MarkerStyle(v) for k, v in shapes.items()}

self._provided = shapes
self.order = order

def _default_values(self, n): # TODO or have this as an infinite generator?
"""Build an arbitrarily long list of unique marker styles for points.
Expand Down Expand Up @@ -361,7 +347,7 @@ class DashSemantic(DiscreteSemantic):

_semantic = "dash pattern"

def __init__(self, styles: list | dict | None = None, order: OrderSpec = None): # TODO full types
def __init__(self, styles: list | dict | None = None): # TODO full types

# TODO fill or filled parameter?
# allow singletons? e.g. map_marker(shapes="o", filled=[True, False])?
Expand All @@ -373,7 +359,6 @@ def __init__(self, styles: list | dict | None = None, order: OrderSpec = None):
styles = {k: self._get_dash_pattern(v) for k, v in styles.items()}

self._provided = styles
self.order = order

def _default_values(self, n: int) -> list[DashPatternWithOffset]:
"""Build an arbitrarily long list of unique dash styles for lines.
Expand Down
73 changes: 49 additions & 24 deletions seaborn/_core/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,16 @@
from matplotlib.axes import Axes
from matplotlib.figure import Figure, SubFigure
from matplotlib.scale import ScaleBase
from matplotlib.colors import Normalize
from seaborn._core.mappings import SemanticMapping
from seaborn._marks.base import Mark
from seaborn._stats.base import Stat
from seaborn._core.typing import DataSource, PaletteSpec, VariableSpec, OrderSpec
from seaborn._core.typing import (
DataSource,
PaletteSpec,
VariableSpec,
OrderSpec,
NormSpec,
)


class Plot:
Expand Down Expand Up @@ -200,6 +205,10 @@ def pair(
for axis in "xy":
keys = []
for i, col in enumerate(pairspec.get(axis, [])):
# TODO note that this assumes no variables are defined as {axis}{digit}
# This could be a slight problem as matplotlib occasionally uses that
# format for artists that take multiple parameters on each axis.
# Perhaps we should set the internal pair variables to "_{axis}{index}"?
key = f"{axis}{i}"
keys.append(key)
pairspec["variables"][key] = col
Expand Down Expand Up @@ -252,32 +261,47 @@ def map_color(
# TODO accept variable specification here?
palette: PaletteSpec = None,
order: OrderSpec = None,
norm: NormSpec = None,
) -> Plot:

# TODO we do some fancy business currently to avoid having to
# write these ... do we want that to persist or is it too confusing?
# If we do ... maybe we don't even need to write these methods, but can
# instead programatically add them based on central dict of mapping objects.
# ALSO TODO should these be initialized with defaults?
self._semantics["color"] = ColorSemantic(palette, order)
self._semantics["color"] = ColorSemantic(palette)
if order is not None:
self.scale_categorical("color", order=order)
elif norm is not None:
self.scale_numeric("color", norm=norm)
return self

def map_facecolor(
self,
palette: PaletteSpec = None,
order: OrderSpec = None,
norm: NormSpec = None,
) -> Plot:

self._semantics["facecolor"] = ColorSemantic(palette, order)
self._semantics["facecolor"] = ColorSemantic(palette)
if order is not None:
self.scale_categorical("facecolor", order=order)
elif norm is not None:
self.scale_numeric("facecolor", norm=norm)
return self

def map_edgecolor(
self,
palette: PaletteSpec = None,
order: OrderSpec = None,
norm: NormSpec = None,
) -> Plot:

self._semantics["edgecolor"] = ColorSemantic(palette, order)
self._semantics["edgecolor"] = ColorSemantic(palette)
if order is not None:
self.scale_categorical("edgecolor", order=order)
elif norm is not None:
self.scale_numeric("edgecolor", norm=norm)
return self

def map_marker(
Expand All @@ -286,7 +310,9 @@ def map_marker(
order: OrderSpec = None,
) -> Plot:

self._semantics["marker"] = MarkerSemantic(shapes, order)
self._semantics["marker"] = MarkerSemantic(shapes)
if order is not None:
self.scale_categorical("marker", order=order)
return self

def map_dash(
Expand All @@ -295,7 +321,9 @@ def map_dash(
order: OrderSpec = None,
) -> Plot:

self._semantics["dash"] = DashSemantic(styles, order)
self._semantics["dash"] = DashSemantic(styles)
if order is not None:
self.scale_categorical("dash", order=order)
return self

# TODO have map_gradient?
Expand All @@ -311,7 +339,7 @@ def scale_numeric(
self,
var: str,
scale: str | ScaleBase = "linear",
norm: tuple[float | None, float | None] | Normalize | None = None,
norm: NormSpec = None,
**kwargs
) -> Plot:

Expand Down Expand Up @@ -721,22 +749,17 @@ def _scale_coords(
df: DataFrame,
) -> DataFrame:

# TODO note that this assumes no variables are defined as {axis}{digit}
# This could be a slight problem as matplotlib occasionally uses that
# format for artists that take multiple parameters on each axis.
# Perhaps we should set the internal pair variables to "_{axis}{index}"?
coord_cols = [c for c in df if re.match(r"^[xy]\D*$", c)]
drop_cols = [c for c in df if re.match(r"^[xy]\d", c)]

out_df = (
df
.copy(deep=False)
.drop(coord_cols + drop_cols, axis=1)
.drop(coord_cols, axis=1)
.reindex(df.columns, axis=1) # So unscaled columns retain their place
)

for subplot in subplots:
axes_df = self._get_subplot_data(df, subplot)[coord_cols]
axes_df = self._filter_subplot_data(df, subplot)[coord_cols]
with pd.option_context("mode.use_inf_as_null", True):
axes_df = axes_df.dropna()
self._scale_coords_single(axes_df, out_df, scales, subplot["ax"])
Expand All @@ -752,18 +775,18 @@ def _scale_coords_single(
) -> None:

# TODO modify out_df in place or return and handle externally?
for var, values in coord_df.items():
for axis, values in coord_df.items():

# TODO Explain the logic of this method thoroughly
# It is clever, but a bit confusing!

axis = var[0]
m = re.match(r"^([xy]\d*).*$", var) # TODO no longer necessary given _generate_pairings?
prefix = m.group(1)

scale = scales[axis]
axis_obj = getattr(ax, f"{axis}axis")

# TODO this is no longer valid with the way the semantic order overrides
# Perhaps better to have the scale always be the source of the order info
# but have a step where the order specified in the mapping overrides it?
# Alternately, use self._orderings here?
if scale.order is not None:
values = values[values.isin(scale.order)]

Expand All @@ -772,8 +795,10 @@ def _scale_coords_single(
values = scale.cast(values)
axis_obj.update_units(categorical_order(values))

scaled = scale.forward(axis_obj.convert_units(values))
out_df.loc[values.index, var] = scaled
# TODO it seems wrong that we need to cast to float here,
# but convert_units sometimes outputs an object array (e.g. w/Int64 values)
scaled = scale.forward(axis_obj.convert_units(values).astype(float))
out_df.loc[values.index, axis] = scaled

def _unscale_coords(
self,
Expand Down Expand Up @@ -836,7 +861,7 @@ def _generate_pairings(

yield subplots, scales, df.assign(**reassignments)

def _get_subplot_data( # TODO FIXME:names maybe _filter_subplot_data?
def _filter_subplot_data( # TODO FIXME:names maybe _filter_subplot_data?
self,
df: DataFrame,
subplot: dict,
Expand Down Expand Up @@ -868,7 +893,7 @@ def generate_splits() -> Generator:

for subplot in subplots:

axes_df = self._get_subplot_data(df, subplot)
axes_df = self._filter_subplot_data(df, subplot)

subplot_keys = {}
for dim in ["col", "row"]:
Expand Down
5 changes: 3 additions & 2 deletions seaborn/_core/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
from typing import TYPE_CHECKING
if TYPE_CHECKING:

from typing import Literal, Union
from typing import Literal, Optional, Union
from collections.abc import Mapping, Hashable, Iterable
from numpy.typing import ArrayLike
from pandas import DataFrame, Series, Index
from matplotlib.colors import Colormap
from matplotlib.colors import Colormap, Normalize

Vector = Union[Series, Index, ArrayLike]
PaletteSpec = Union[str, list, dict, Colormap, None]
VariableSpec = Union[Hashable, Vector, None]
OrderSpec = Union[Series, Index, Iterable, None] # TODO technically str is iterable
NormSpec = Union[tuple[Optional[float], Optional[float]], Normalize, None]
# TODO can we better unify the VarType object and the VariableType alias?
VariableType = Literal["numeric", "categorical", "datetime", "unknown"]
DataSource = Union[DataFrame, Mapping[Hashable, Vector], None]

0 comments on commit b527b57

Please sign in to comment.