Clear mypy failures

mwaskom · Sep 28, 2021 · cc8f73a · cc8f73a
1 parent 8282870
commit cc8f73a
Show file tree

Hide file tree

Showing 4 changed files with 74 additions and 122 deletions.
diff --git a/seaborn/_core/mappings.py b/seaborn/_core/mappings.py
@@ -2,7 +2,6 @@
 import itertools
 import warnings
 
-import numpy as np
 import pandas as pd
 import matplotlib as mpl
 from matplotlib.colors import to_rgb
@@ -24,14 +23,17 @@
     DashPatternWithOffset = Tuple[float, Optional[DashPattern]]
 
 
-# TODO I think we want map_semantic to accept _order/_norm parameters.
-# But that forces some decisions:
-# - Which takes precedence? (i.e. .map_ vs .scale_)?
-# - Does Plot.map_ internally call self.scale_ or hand off to the Semantic?
+class Semantic:
 
+    _semantic: str  # TODO or name?
 
-class Semantic:
-    ...
+    def setup(
+        self,
+        data: Series,  # TODO generally rename Series arguments to distinguish from DF?
+        scale: Scale | None = None,  # TODO or always have a Scale?
+    ) -> SemanticMapping:
+
+        raise NotImplementedError()
 
 
 class BinarySemantic(Semantic):
@@ -40,6 +42,8 @@ class BinarySemantic(Semantic):
 
 class DiscreteSemantic(Semantic):
 
+    _provided: list | dict | None
+
     def _default_values(self, n: int) -> list:
         """Return n unique values."""
         raise NotImplementedError
@@ -107,7 +111,7 @@ def setup(
         self,
         data: Series,  # TODO generally rename Series arguments to distinguish from DF?
         scale: Scale | None = None,  # TODO or always have a Scale?
-    ) -> NormedMapping:
+    ):  # TODO reurn type
 
         values = self._values
         # norm = None if scale is None else scale.norm
@@ -133,53 +137,20 @@ def _setup_categorical(data, values, order):
 # ==================================================================================== #
 
 
-class FillSemantic(BinarySemantic):
-    ...
-
-
 class ColorSemantic(Semantic):
 
     def __init__(self, palette: PaletteSpec = None):
 
         self._palette = palette
 
-    def __call__(self, x):  # TODO types; will need to overload
-
-        # TODO we are missing numeric maps and lots of other things
-        if isinstance(x, pd.Series):
-            if x.dtype.name == "category":  # TODO! possible pandas bug
-                x = x.astype(object)
-            # TODO where is best place to ensure that LUT values are rgba tuples?
-            return np.stack(x.map(self.dictionary).map(to_rgb))
-        else:
-            return to_rgb(self.dictionary[x])
-
-    def _infer_map_type(
-        self,
-        scale: Scale,
-        palette: PaletteSpec,
-        data: Series,
-    ) -> VarType:
-        """Determine how to implement the mapping."""
-        map_type: VarType
-        if scale is not None:
-            return scale.type
-        elif palette in QUAL_PALETTES:
-            map_type = VarType("categorical")
-        elif isinstance(palette, (dict, list)):
-            map_type = VarType("categorical")
-        else:
-            map_type = variable_type(data, boolean_type="categorical")
-        return map_type
-
     def setup(
         self,
         data: Series,  # TODO generally rename Series arguments to distinguish from DF?
         scale: Scale | None = None,  # TODO or always have a Scale?
     ) -> LookupMapping | NormedMapping:
         """Infer the type of mapping to use and define it using this vector of data."""
+        mapping: LookupMapping | NormedMapping
         palette: PaletteSpec = self._palette
-        cmap: Colormap | None = None
 
         # TODO allow configuration of norm in mapping methods like we do with order?
         norm = None if scale is None else scale.norm
@@ -192,48 +163,30 @@ def setup(
 
         if map_type == "categorical":
 
-            mapping = self._setup_categorical(data, palette, order)
-            return LookupMapping(mapping)
+            mapping = LookupMapping(self._setup_categorical(data, palette, order))
 
         elif map_type == "numeric":
 
             data = pd.to_numeric(data)
-            mapping, norm, transform = self._setup_numeric(data, palette, norm)
-            if mapping is not None:
+            lookup, norm, transform = self._setup_numeric(data, palette, norm)
+            if lookup:
                 # TODO See comments in _setup_numeric about deprecation of this
-                return LookupMapping(mapping)
-            return NormedMapping(norm, transform)
-
-        # --- Option 3: datetime mapping
+                mapping = LookupMapping(lookup)
+            else:
+                mapping = NormedMapping(norm, transform)
 
         elif map_type == "datetime":
             # TODO this needs actual implementation
-            cmap = norm = None
-            levels, dictionary = self._setup_categorical(
-                # Casting data to list to handle differences in the way
-                # pandas and numpy represent datetime64 data
-                list(data), palette, order,
-            )
-
-        # TODO do we need to return and assign out here or can the
-        # type-specific methods do the assignment internally
-
-        # TODO I don't love how this is kind of a mish-mash of attributes
-        # Can we be more consistent across SemanticMapping subclasses?
-        self.dictionary = dictionary
-        self.palette = palette
-        self.levels = levels
-        self.norm = norm
-        self.cmap = cmap
+            mapping = LookupMapping(self._setup_categorical(data, palette, order))
 
-        return self
+        return mapping
 
     def _setup_categorical(
         self,
         data: Series,
         palette: PaletteSpec,
         order: list | None,
-    ) -> tuple[list, dict]:
+    ) -> dict[Any, tuple[float, float, float]]:
         """Determine colors when the mapping is categorical."""
         # -- Identify the order and name of the levels
 
@@ -278,7 +231,7 @@ def _setup_numeric(
         data: Series,
         palette: PaletteSpec,
         norm: Normalize | None,
-    ) -> tuple[list, dict, Normalize | None, Colormap]:
+    ) -> tuple[dict, Normalize, Callable]:
         """Determine colors when the variable is quantitative."""
         cmap: Colormap
         if isinstance(palette, dict):
@@ -317,7 +270,7 @@ def _setup_numeric(
                 err = "`norm` must be None, tuple, or Normalize object."
                 raise ValueError(err)
             norm.autoscale_None(data.dropna())
-            mapping = None
+            mapping = {}
 
         def rgb_transform(x):
             rgba = cmap(x)
@@ -328,6 +281,28 @@ def rgb_transform(x):
 
         return mapping, norm, rgb_transform
 
+    def _infer_map_type(
+        self,
+        scale: Scale,
+        palette: PaletteSpec,
+        data: Series,
+    ) -> VarType:
+        """Determine how to implement the mapping."""
+        map_type: VarType
+        if scale is not None:
+            return scale.type
+        elif palette in QUAL_PALETTES:
+            map_type = VarType("categorical")
+        elif isinstance(palette, (dict, list)):
+            map_type = VarType("categorical")
+        else:
+            map_type = variable_type(data, boolean_type="categorical")
+        return map_type
+
+
+class FillSemantic(BinarySemantic):
+    ...
+
 
 class MarkerSemantic(DiscreteSemantic):
 
@@ -527,7 +502,7 @@ def __call__(self, x: Any) -> Any:  # Possibly to type output based on lookup_ta
 
 class NormedMapping(SemanticMapping):
 
-    def __init__(self, norm: Normalize, transform: Callable[float, Any]):
+    def __init__(self, norm: Normalize, transform: Callable):
 
         self.norm = norm
         self.transform = transform
@@ -539,37 +514,3 @@ def __call__(self, x: Any) -> Any:
         # TODO note that matplotlib Normalize is going to return a masked array
         # maybe this is fine since we're handing the output off to matplotlib?
         return self.transform(self.norm(x))
-
-# ==================================================================================== #
-
-
-class SemanticMapping:
-    """Base class for mappings between data and visual attributes."""
-
-    def setup(self, data: Series, scale: Scale | None) -> SemanticMapping:
-        # TODO why not just implement the GroupMapping setup() here?
-        raise NotImplementedError()
-
-    def __call__(self, x):  # TODO types; will need to overload (wheee)
-        # TODO this is a hack to get things working
-        if isinstance(x, pd.Series):
-            if x.dtype.name == "category":  # TODO! possible pandas bug
-                x = x.astype(object)
-            # TODO where is best place to ensure that LUT values are rgba tuples?
-            # TODO may need to move below line to ColorMapping
-            # return np.stack(x.map(self.dictionary))
-            return x.map(self.dictionary)
-        else:
-            return self.dictionary[x]
-
-
-# TODO Currently, the SemanticMapping objects are also the source of the information
-# about the levels/order of the semantic variables. Do we want to decouple that?
-
-# TODO Perhaps the setup method should not add attributes and return self, but rather
-# return an object that is initialized to do the mapping. This would make it so that
-# Plotter._setup_mappings() won't mutate attributes on the Plot that generated it.
-# Think about this a bit more but I think it's the way forward. Decrease state!
-# Also if __init__ is just going to store information, can we abstract that in
-# a nice way while also having a method with a signature/docstring we can use to
-# attach map_{semantic} methods to Plot?
diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py
@@ -10,7 +10,7 @@
 import matplotlib as mpl
 import matplotlib.pyplot as plt  # TODO defer import into Plot.show()
 
-from seaborn._core.rules import categorical_order, variable_type
+from seaborn._core.rules import variable_type, categorical_order
 from seaborn._core.data import PlotData
 from seaborn._core.subplots import Subplots
 from seaborn._core.mappings import (
@@ -33,7 +33,7 @@
     from matplotlib.axes import Axes
     from matplotlib.figure import Figure, SubFigure
     from matplotlib.scale import ScaleBase
-    from seaborn._core.mappings import SemanticMapping
+    from seaborn._core.mappings import Semantic, SemanticMapping
     from seaborn._marks.base import Mark
     from seaborn._stats.base import Stat
     from seaborn._core.typing import (
@@ -50,8 +50,9 @@ class Plot:
     _data: PlotData
     _layers: list[Layer]
     # TODO -> _semantics, have _mappings hold the objects returned from Semantic.setup?
+    _semantics: dict[str, Semantic]
     _mappings: dict[str, SemanticMapping]  # TODO keys as Literal, or use TypedDict?
-    _scales: dict[str, ScaleBase]
+    _scales: dict[str, ScaleWrapper]
 
     # TODO use TypedDict here
     _subplotspec: dict[str, Any]
@@ -121,6 +122,9 @@ def add(
         **variables: VariableSpec,
     ) -> Plot:
 
+        # TODO FIXME:layer change the layer object to a simple dictionary,
+        # there's almost no logic in the class and it will make copy/update less awkward
+
         # TODO do a check here that mark has been initialized,
         # otherwise errors will be inscrutable
 
@@ -803,13 +807,17 @@ def _unscale_coords(
     def _generate_pairings(
         self,
         df: DataFrame
-    ) -> Generator[tuple[list[dict], dict[str, ScaleWrapper], DataFrame], None, None]:
+    ) -> Generator[
+        tuple[list[dict], dict[str, ScaleWrapper], DataFrame], None, None
+    ]:
         # TODO retype return with SubplotSpec or similar
 
         pair_variables = self._pairspec.get("structure", {})
 
         if not pair_variables:
-            yield self._subplots, self._scales, df
+            # TODO casting to list because subplots below is a list
+            # Maybe a cleaner way to do this?
+            yield list(self._subplots), self._scales, df
             return
 
         iter_axes = itertools.product(*[
@@ -823,10 +831,11 @@ def _generate_pairings(
                 if (x is None or sub["x"] == x) and (y is None or sub["y"] == y):
                     subplots.append(sub)
 
-            scales = {
-                "x": self._scales.get("x" if x is None else x),
-                "y": self._scales.get("y" if y is None else y),
-            }
+            scales = {}
+            for axis, prefix in zip("xy", [x, y]):
+                key = axis if prefix is None else prefix
+                if key in self._scales:
+                    scales[axis] = self._scales[key]
 
             reassignments = {}
             for axis, prefix in zip("xy", [x, y]):
@@ -947,14 +956,14 @@ def _repr_png_(self) -> bytes:
 
 class Layer:
 
-    data = PlotData
+    data: PlotData
 
     def __init__(
         self,
         mark: Mark,
         stat: Stat | None,
         source: DataSource | None,
-        variables: VariableSpec | None,
+        variables: dict[str, VariableSpec],
     ):
 
         self.mark = mark
@@ -963,4 +972,6 @@ def __init__(
         self.variables = variables
 
     def __contains__(self, key: str) -> bool:
-        return key in self.data
+        if hasattr(self, "data"):
+            return key in self.data
+        return False
diff --git a/seaborn/_core/scales.py b/seaborn/_core/scales.py
@@ -23,7 +23,7 @@ class ScaleWrapper:
     def __init__(
         self,
         scale: ScaleBase,
-        type: VariableType,  # TODO don't use builtin name?
+        type: VarType | VariableType,  # TODO don't use builtin name?
         norm: tuple[float | None, float | None] | Normalize | None = None,
     ):
 

diff --git a/seaborn/_core/typing.py b/seaborn/_core/typing.py
@@ -2,7 +2,7 @@
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
 
-    from typing import Literal, Optional, Union
+    from typing import Literal, Optional, Union, Tuple
     from collections.abc import Mapping, Hashable, Iterable
     from numpy.typing import ArrayLike
     from pandas import DataFrame, Series, Index
@@ -12,7 +12,7 @@
     PaletteSpec = Union[str, list, dict, Colormap, None]
     VariableSpec = Union[Hashable, Vector, None]
     OrderSpec = Union[Series, Index, Iterable, None]  # TODO technically str is iterable
-    NormSpec = Union[tuple[Optional[float], Optional[float]], Normalize, None]
+    NormSpec = Union[Tuple[Optional[float], Optional[float]], Normalize, None]
     # TODO can we better unify the VarType object and the VariableType alias?
-    VariableType = Literal["numeric", "categorical", "datetime", "unknown"]
+    VariableType = Literal["numeric", "categorical", "datetime"]
     DataSource = Union[DataFrame, Mapping[Hashable, Vector], None]