diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 7e61aea2ad..0000000000 --- a/pytest.ini +++ /dev/null @@ -1,5 +0,0 @@ -[pytest] -filterwarnings = -; Warnings raised from within patsy imports - ignore:Using or importing the ABCs:DeprecationWarning -junit_family=xunit1 diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 2469df0068..4a8fc297b5 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -23,7 +23,7 @@ from seaborn._stats.base import Stat from seaborn._core.data import PlotData from seaborn._core.moves import Move -from seaborn._core.scales import ScaleSpec, Scale +from seaborn._core.scales import Scale from seaborn._core.subplots import Subplots from seaborn._core.groupby import GroupBy from seaborn._core.properties import PROPERTIES, Property, Coordinate @@ -146,7 +146,7 @@ class Plot: _data: PlotData _layers: list[Layer] - _scales: dict[str, ScaleSpec] + _scales: dict[str, Scale] _subplot_spec: dict[str, Any] # TODO values type _facet_spec: FacetSpec @@ -520,7 +520,7 @@ def facet( # TODO def twin()? - def scale(self, **scales: ScaleSpec) -> Plot: + def scale(self, **scales: Scale) -> Plot: """ Control mappings from data units to visual properties. @@ -873,7 +873,7 @@ def _transform_coords(self, p: Plot, common: PlotData, layers: list[Layer]) -> N var_df = pd.DataFrame(columns=cols) prop = Coordinate(axis) - scale_spec = self._get_scale(p, prefix, prop, var_df[var]) + scale = self._get_scale(p, prefix, prop, var_df[var]) # Shared categorical axes are broken on matplotlib<3.4.0. # https://github.com/matplotlib/matplotlib/pull/18308 @@ -882,7 +882,7 @@ def _transform_coords(self, p: Plot, common: PlotData, layers: list[Layer]) -> N if Version(mpl.__version__) < Version("3.4.0"): from seaborn._core.scales import Nominal paired_axis = axis in p._pair_spec - cat_scale = isinstance(scale_spec, Nominal) + cat_scale = isinstance(scale, Nominal) ok_dim = {"x": "col", "y": "row"}[axis] shared_axes = share_state not in [False, "none", ok_dim] if paired_axis and cat_scale and shared_axes: @@ -897,7 +897,7 @@ def _transform_coords(self, p: Plot, common: PlotData, layers: list[Layer]) -> N # Setup the scale on all of the data and plug it into self._scales # We do this because by the time we do self._setup_scales, coordinate data # will have been converted to floats already, so scale inference fails - self._scales[var] = scale_spec.setup(var_df[var], prop) + self._scales[var] = scale._setup(var_df[var], prop) # Set up an empty series to receive the transformed values. # We need this to handle piecemeal tranforms of categories -> floats. @@ -927,7 +927,7 @@ def _transform_coords(self, p: Plot, common: PlotData, layers: list[Layer]) -> N seed_values = var_df.loc[idx, var] - scale = scale_spec.setup(seed_values, prop, axis=axis_obj) + scale = scale._setup(seed_values, prop, axis=axis_obj) for layer, new_series in zip(layers, transformed_data): layer_df = layer["data"].frame @@ -936,7 +936,7 @@ def _transform_coords(self, p: Plot, common: PlotData, layers: list[Layer]) -> N new_series.loc[idx] = scale(layer_df.loc[idx, var]) # TODO need decision about whether to do this or modify axis transform - set_scale_obj(view["ax"], axis, scale.matplotlib_scale) + set_scale_obj(view["ax"], axis, scale._matplotlib_scale) # Now the transformed data series are complete, set update the layer data for layer, new_series in zip(layers, transformed_data): @@ -1000,11 +1000,11 @@ def _compute_stats(self, spec: Plot, layers: list[Layer]) -> None: def _get_scale( self, spec: Plot, var: str, prop: Property, values: Series - ) -> ScaleSpec: + ) -> Scale: if var in spec._scales: arg = spec._scales[var] - if arg is None or isinstance(arg, ScaleSpec): + if arg is None or isinstance(arg, Scale): scale = arg else: scale = prop.infer_scale(arg, values) @@ -1052,28 +1052,28 @@ def _setup_scales(self, p: Plot, layers: list[Layer]) -> None: axis = m["axis"] prop = PROPERTIES.get(var if axis is None else axis, Property()) - scale_spec = self._get_scale(p, var, prop, var_values) + scale = self._get_scale(p, var, prop, var_values) # Initialize the data-dependent parameters of the scale # Note that this returns a copy and does not mutate the original # This dictionary is used by the semantic mappings - if scale_spec is None: + if scale is None: # TODO what is the cleanest way to implement identity scale? - # We don't really need a ScaleSpec, and Identity() will be + # We don't really need a Scale, and Identity() will be # overloaded anyway (but maybe a general Identity object # that can be used as Scale/Mark/Stat/Move?) # Note that this may not be the right spacer to use # (but that is only relevant for coordinates, where identity scale # doesn't make sense or is poorly defined, since we don't use pixels.) - self._scales[var] = Scale([], lambda x: x, None, "identity", None) + self._scales[var] = Scale._identity() else: - scale = scale_spec.setup(var_values, prop) + scale = scale._setup(var_values, prop) if isinstance(prop, Coordinate): # If we have a coordinate here, we didn't assign a scale for it # in _transform_coords, which means it was added during compute_stat # This allows downstream orientation inference to work properly. # But it feels a little hacky, so perhaps revisit. - scale.scale_type = "computed" + scale._priority = 0 # type: ignore self._scales[var] = scale def _plot_layer(self, p: Plot, layer: Layer) -> None: @@ -1097,14 +1097,14 @@ def get_order(var): # sorted unique numbers will correctly reconstruct intended order # TODO This is tricky, make sure we add some tests for this if var not in "xy" and var in scales: - return scales[var].order + return getattr(scales[var], "order", None) if "width" in mark._mappable_props: width = mark._resolve(df, "width", None) else: width = df.get("width", 0.8) # TODO what default if orient in df: - df["width"] = width * scales[orient].spacing(df[orient]) + df["width"] = width * scales[orient]._spacing(df[orient]) if "baseline" in mark._mappable_props: # TODO what marks should have this? @@ -1277,7 +1277,7 @@ def _setup_split_generator( v for v in grouping_vars if v in df and v not in ["col", "row"] ] for var in grouping_vars: - order = self._scales[var].order + order = getattr(self._scales[var], "order", None) if order is None: order = categorical_order(df[var]) grouping_keys.append(order) @@ -1357,7 +1357,7 @@ def _update_legend_contents( ]] = [] schema = [] for var in legend_vars: - var_legend = scales[var].legend + var_legend = scales[var]._legend if var_legend is not None: values, labels = var_legend for (_, part_id), part_vars, _ in schema: diff --git a/seaborn/_core/properties.py b/seaborn/_core/properties.py index 95539dc6f4..8a6a4a6523 100644 --- a/seaborn/_core/properties.py +++ b/seaborn/_core/properties.py @@ -8,7 +8,7 @@ from matplotlib.colors import to_rgb, to_rgba, to_rgba_array from matplotlib.path import Path -from seaborn._core.scales import ScaleSpec, Nominal, Continuous, Temporal +from seaborn._core.scales import Scale, Nominal, Continuous, Temporal from seaborn._core.rules import categorical_order, variable_type from seaborn._compat import MarkerStyle from seaborn.palettes import QUAL_PALETTES, color_palette, blend_palette @@ -59,7 +59,7 @@ def __init__(self, variable: str | None = None): variable = self.__class__.__name__.lower() self.variable = variable - def default_scale(self, data: Series) -> ScaleSpec: + def default_scale(self, data: Series) -> Scale: """Given data, initialize appropriate scale class.""" # TODO allow variable_type to be "boolean" if that's a scale? # TODO how will this handle data with units that can be treated as numeric @@ -75,7 +75,7 @@ def default_scale(self, data: Series) -> ScaleSpec: else: return Nominal() - def infer_scale(self, arg: Any, data: Series) -> ScaleSpec: + def infer_scale(self, arg: Any, data: Series) -> Scale: """Given data and a scaling argument, initialize appropriate scale class.""" # TODO put these somewhere external for validation # TODO putting this here won't pick it up if subclasses define infer_scale @@ -86,7 +86,7 @@ def infer_scale(self, arg: Any, data: Series) -> ScaleSpec: if isinstance(arg, str): if any(arg.startswith(k) for k in trans_args): # TODO validate numeric type? That should happen centrally somewhere - return Continuous(transform=arg) + return Continuous(trans=arg) else: msg = f"Unknown magic arg for {self.variable} scale: '{arg}'." raise ValueError(msg) @@ -96,7 +96,7 @@ def infer_scale(self, arg: Any, data: Series) -> ScaleSpec: raise TypeError(msg) def get_mapping( - self, scale: ScaleSpec, data: Series + self, scale: Scale, data: Series ) -> Callable[[ArrayLike], ArrayLike]: """Return a function that maps from data domain to property range.""" def identity(x): @@ -176,7 +176,7 @@ def _inverse(self, values: ArrayLike) -> ArrayLike: """Transform applied to results of mapping that returns to native values.""" return values - def infer_scale(self, arg: Any, data: Series) -> ScaleSpec: + def infer_scale(self, arg: Any, data: Series) -> Scale: """Given data and a scaling argument, initialize appropriate scale class.""" # TODO infer continuous based on log/sqrt etc? @@ -192,7 +192,7 @@ def infer_scale(self, arg: Any, data: Series) -> ScaleSpec: return Continuous(arg) def get_mapping( - self, scale: ScaleSpec, data: ArrayLike + self, scale: Scale, data: ArrayLike ) -> Callable[[ArrayLike], ArrayLike]: """Return a function that maps from data domain to property range.""" if isinstance(scale, Nominal): @@ -325,7 +325,7 @@ def infer_scale(self, arg: Any, data: Series) -> Nominal: return Nominal(arg) def get_mapping( - self, scale: ScaleSpec, data: Series, + self, scale: Scale, data: Series, ) -> Callable[[ArrayLike], list]: """Define mapping as lookup into list of object values.""" order = getattr(scale, "order", None) @@ -532,7 +532,7 @@ def has_alpha(x): else: return to_rgba_array(colors)[:, :3] - def infer_scale(self, arg: Any, data: Series) -> ScaleSpec: + def infer_scale(self, arg: Any, data: Series) -> Scale: # TODO when inferring Continuous without data, verify type # TODO need to rethink the variable type system @@ -617,7 +617,7 @@ def mapping(x): return mapping def get_mapping( - self, scale: ScaleSpec, data: Series + self, scale: Scale, data: Series ) -> Callable[[ArrayLike], ArrayLike]: """Return a function that maps from data domain to color values.""" # TODO what is best way to do this conditional? @@ -690,13 +690,13 @@ def default_scale(self, data: Series) -> Nominal: """Given data, initialize appropriate scale class.""" return Nominal() - def infer_scale(self, arg: Any, data: Series) -> ScaleSpec: + def infer_scale(self, arg: Any, data: Series) -> Scale: """Given data and a scaling argument, initialize appropriate scale class.""" # TODO infer Boolean where possible? return Nominal(arg) def get_mapping( - self, scale: ScaleSpec, data: Series + self, scale: Scale, data: Series ) -> Callable[[ArrayLike], ArrayLike]: """Return a function that maps each data value to True or False.""" # TODO categorical_order is going to return [False, True] for booleans, diff --git a/seaborn/_core/scales.py b/seaborn/_core/scales.py index 61a35f787e..a3cdfb28f6 100644 --- a/seaborn/_core/scales.py +++ b/seaborn/_core/scales.py @@ -1,11 +1,12 @@ from __future__ import annotations import re from copy import copy +from collections.abc import Sequence from dataclasses import dataclass from functools import partial +from typing import Any, Callable, Tuple, Optional, Union, ClassVar import numpy as np -import pandas as pd import matplotlib as mpl from matplotlib.ticker import ( Locator, @@ -15,9 +16,14 @@ FixedLocator, LinearLocator, LogLocator, + SymmetricalLogLocator, MaxNLocator, MultipleLocator, + EngFormatter, + FuncFormatter, + LogFormatterSciNotation, ScalarFormatter, + StrMethodFormatter, ) from matplotlib.dates import ( AutoDateLocator, @@ -25,158 +31,150 @@ ConciseDateFormatter, ) from matplotlib.axis import Axis +from matplotlib.scale import ScaleBase +from pandas import Series from seaborn._core.rules import categorical_order from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable, Tuple, Optional, Union - from collections.abc import Sequence - from matplotlib.scale import ScaleBase as MatplotlibScale - from pandas import Series - from numpy.typing import ArrayLike from seaborn._core.properties import Property + from numpy.typing import ArrayLike Transforms = Tuple[ Callable[[ArrayLike], ArrayLike], Callable[[ArrayLike], ArrayLike] ] - # TODO standardize String / ArrayLike interface Pipeline = Sequence[Optional[Callable[[Union[Series, ArrayLike]], ArrayLike]]] class Scale: - def __init__( - self, - forward_pipe: Pipeline, - spacer: Callable[[Series], float], - legend: tuple[list[Any], list[str]] | None, - scale_type: str, - matplotlib_scale: MatplotlibScale, - ): - - self.forward_pipe = forward_pipe - self.spacer = spacer - self.legend = legend - self.scale_type = scale_type - self.matplotlib_scale = matplotlib_scale - - # TODO need to make this work - self.order = None - - def __call__(self, data: Series) -> ArrayLike: - - return self._apply_pipeline(data, self.forward_pipe) + values: tuple | str | list | dict | None - # TODO def as_identity(cls): ? + _priority: ClassVar[int] + _pipeline: Pipeline + _matplotlib_scale: ScaleBase + _spacer: staticmethod + _legend: tuple[list[str], list[Any]] | None - def _apply_pipeline( - self, data: ArrayLike, pipeline: Pipeline, - ) -> ArrayLike: - - # TODO sometimes we need to handle scalars (e.g. for Line) - # but what is the best way to do that? - scalar_data = np.isscalar(data) - if scalar_data: - data = np.array([data]) + def __post_init__(self): - for func in pipeline: - if func is not None: - data = func(data) + self._tick_params = None + self._label_params = None + self._legend = None - if scalar_data: - data = data[0] + def tick(self): + raise NotImplementedError() - return data + def label(self): + raise NotImplementedError() - def spacing(self, data: Series) -> float: - return self.spacer(data) + def _get_locators(self): + raise NotImplementedError() - def invert_axis_transform(self, x): - # TODO we may no longer need this method as we use the axis - # transform directly in Plotter._unscale_coords - finv = self.matplotlib_scale.get_transform().inverted().transform - out = finv(x) - if isinstance(x, pd.Series): - return pd.Series(out, index=x.index, name=x.name) - return out + def _get_formatter(self, locator: Locator | None = None): + raise NotImplementedError() + def _get_scale(self, name: str, forward: Callable, inverse: Callable): -@dataclass -class ScaleSpec: + major_locator, minor_locator = self._get_locators(**self._tick_params) + major_formatter = self._get_formatter(major_locator, **self._label_params) - values: tuple | str | list | dict | None = None + class InternalScale(mpl.scale.FuncScale): + def set_default_locators_and_formatters(self, axis): + axis.set_major_locator(major_locator) + if minor_locator is not None: + axis.set_minor_locator(minor_locator) + axis.set_major_formatter(major_formatter) - ... - # TODO have Scale define width (/height?) ('space'?) (using data?), so e.g. nominal - # scale sets width=1, continuous scale sets width min(diff(unique(data))), etc. + return InternalScale(name, (forward, inverse)) - def __post_init__(self): + def _spacing(self, x: Series) -> float: + return self._spacer(x) - # TODO do we need anything else here? - self.tick() - self.format() + def _setup( + self, data: Series, prop: Property, axis: Axis | None = None, + ) -> Scale: + raise NotImplementedError() - def tick(self): - # TODO what is the right base method? - self._major_locator: Locator - self._minor_locator: Locator - return self + def __call__(self, data: Series) -> ArrayLike: - def format(self): - self._major_formatter: Formatter - return self + # TODO sometimes we need to handle scalars (e.g. for Line) + # but what is the best way to do that? + scalar_data = np.isscalar(data) + if scalar_data: + data = np.array([data]) - def setup( - self, data: Series, prop: Property, axis: Axis | None = None, - ) -> Scale: - ... + for func in self._pipeline: + if func is not None: + data = func(data) - # TODO typing - def _get_scale(self, name, forward, inverse): + if scalar_data: + data = data[0] - major_locator = self._major_locator - minor_locator = self._minor_locator + return data - # TODO hack, need to add default to Continuous - major_formatter = getattr(self, "_major_formatter", ScalarFormatter()) - # major_formatter = self._major_formatter + @staticmethod + def _identity(): - class Scale(mpl.scale.FuncScale): - def set_default_locators_and_formatters(self, axis): - axis.set_major_locator(major_locator) - if minor_locator is not None: - axis.set_minor_locator(minor_locator) - axis.set_major_formatter(major_formatter) + class Identity(Scale): + _pipeline = [] + _spacer = None + _legend = None + _matplotlib_scale = None - return Scale(name, (forward, inverse)) + return Identity() @dataclass -class Nominal(ScaleSpec): +class Nominal(Scale): """ A categorical scale without relative importance / magnitude. """ # Categorical (convert to strings), un-sortable + values: tuple | str | list | dict | None = None order: list | None = None - def setup( + _priority: ClassVar[int] = 3 + + def _setup( self, data: Series, prop: Property, axis: Axis | None = None, ) -> Scale: - class CatScale(mpl.scale.LinearScale): - # TODO turn this into a real thing I guess - name = None # To work around mpl<3.4 compat issues - - def set_default_locators_and_formatters(self, axis): - pass + new = copy(self) + if new._tick_params is None: + new = new.tick() + if new._label_params is None: + new = new.label() # TODO flexibility over format() which isn't great for numbers / dates stringify = np.vectorize(format) - units_seed = categorical_order(data, self.order) + units_seed = categorical_order(data, new.order) + + # TODO move to Nominal._get_scale? + # TODO this needs some more complicated rethinking about how to pass + # a unit dictionary down to these methods, along with how much we want + # to invest in their API. What is it useful for tick() to do here? + # (Ordinal may be different if we draw that contrast). + # Any customization we do to allow, e.g., label wrapping will probably + # require defining our own Formatter subclass. + # We could also potentially implement auto-wrapping in an Axis subclass + # (see Axis.draw ... it already is computing the bboxes). + # major_locator, minor_locator = new._get_locators(**new._tick_params) + # major_formatter = new._get_formatter(major_locator, **new._label_params) + + class CatScale(mpl.scale.LinearScale): + name = None # To work around mpl<3.4 compat issues + + def set_default_locators_and_formatters(self, axis): + ... + # axis.set_major_locator(major_locator) + # if minor_locator is not None: + # axis.set_minor_locator(minor_locator) + # axis.set_major_formatter(major_formatter) mpl_scale = CatScale(data.name) if axis is None: @@ -187,6 +185,8 @@ def set_default_locators_and_formatters(self, axis): # and (B) allow the values parameter for a Coordinate to set xlim/ylim axis.set_view_interval(0, len(units_seed) - 1) + new._matplotlib_scale = mpl_scale + # TODO array cast necessary to handle float/int mixture, which we need # to solve in a more systematic way probably # (i.e. if we have [1, 2.5], do we want [1.0, 2.5]? Unclear) @@ -204,64 +204,134 @@ def convert_units(x): out[keep] = axis.convert_units(stringify(x[keep])) return out - forward_pipe = [ + new._pipeline = [ convert_units, - prop.get_mapping(self, data), + prop.get_mapping(new, data), # TODO how to handle color representation consistency? ] def spacer(x): return 1 + new._spacer = spacer + if prop.legend: - legend = units_seed, list(stringify(units_seed)) - else: - legend = None + new._legend = units_seed, list(stringify(units_seed)) - scale_type = self.__class__.__name__.lower() - scale = Scale(forward_pipe, spacer, legend, scale_type, mpl_scale) - return scale + return new + + def tick(self, locator: Locator | None = None): + """ + Configure the selection of ticks for the scale's axis or legend. + + .. note:: + This API is under construction and will be enhanced over time. + At the moment, it is probably not very useful. + + Parameters + ---------- + locator: :class:`matplotlib.ticker.Locator` subclass + Pre-configured matplotlib locator; other parameters will not be used. + + Returns + ------- + Copy of self with new tick configuration. + + """ + new = copy(self) + new._tick_params = { + "locator": locator, + } + return new + + def label(self, formatter: Formatter | None = None): + """ + Configure the selection of labels for the scale's axis or legend. + + .. note:: + This API is under construction and will be enhanced over time. + At the moment, it is probably not very useful. + + Parameters + ---------- + formatter: :class:`matplotlib.ticker.Formatter` subclass + Pre-configured matplotlib formatter; other parameters will not be used. + + Returns + ------- + Copy of self with new tick configuration. + + """ + new = copy(self) + new._label_params = { + "formatter": formatter, + } + return new + + def _get_locators(self, locator): + + if locator is not None: + return locator, None + + locator = mpl.category.StrCategoryLocator({}) + + return locator, None + + def _get_formatter(self, locator, formatter): + + if formatter is not None: + return formatter + + formatter = mpl.category.StrCategoryFormatter({}) + + return formatter @dataclass -class Ordinal(ScaleSpec): +class Ordinal(Scale): # Categorical (convert to strings), sortable, can skip ticklabels ... @dataclass -class Discrete(ScaleSpec): +class Discrete(Scale): # Numeric, integral, can skip ticks/ticklabels ... @dataclass -class ContinuousBase(ScaleSpec): +class ContinuousBase(Scale): values: tuple | str | None = None norm: tuple | None = None - def setup( + def _setup( self, data: Series, prop: Property, axis: Axis | None = None, ) -> Scale: new = copy(self) - forward, inverse = self._get_transform() + if new._tick_params is None: + new = new.tick() + if new._label_params is None: + new = new.label() - mpl_scale = self._get_scale(data.name, forward, inverse) + forward, inverse = new._get_transform() + + mpl_scale = new._get_scale(data.name, forward, inverse) if axis is None: axis = PseudoAxis(mpl_scale) axis.update_units(data) mpl_scale.set_default_locators_and_formatters(axis) + new._matplotlib_scale = mpl_scale normalize: Optional[Callable[[ArrayLike], ArrayLike]] if prop.normed: - if self.norm is None: + if new.norm is None: vmin, vmax = data.min(), data.max() else: - vmin, vmax = self.norm + vmin, vmax = new.norm vmin, vmax = axis.convert_units((vmin, vmax)) a = forward(vmin) b = forward(vmax) - forward(vmin) @@ -272,7 +342,7 @@ def normalize(x): else: normalize = vmin = vmax = None - forward_pipe = [ + new._pipeline = [ axis.convert_units, forward, normalize, @@ -281,24 +351,24 @@ def normalize(x): def spacer(x): return np.min(np.diff(np.sort(x.dropna().unique()))) + new._spacer = spacer - # TODO make legend optional on per-plot basis with ScaleSpec parameter? + # TODO How to allow disabling of legend for all uses of property? + # Could add a Scale parameter, or perhaps Scale.suppress()? + # Are there other useful parameters that would be in Scale.legend() + # besides allowing Scale.legend(False)? if prop.legend: axis.set_view_interval(vmin, vmax) locs = axis.major.locator() locs = locs[(vmin <= locs) & (locs <= vmax)] labels = axis.major.formatter.format_ticks(locs) - legend = list(locs), list(labels) - - else: - legend = None + new._legend = list(locs), list(labels) - scale_type = self.__class__.__name__.lower() - return Scale(forward_pipe, spacer, legend, scale_type, mpl_scale) + return new def _get_transform(self): - arg = self.transform + arg = self.trans def get_param(method, default): if arg == method: @@ -327,8 +397,7 @@ def get_param(method, default): elif arg == "sqrt": return _make_sqrt_transforms() else: - # TODO useful error message - raise ValueError() + raise ValueError(f"Unknown value provided for trans: {arg!r}") @dataclass @@ -336,13 +405,13 @@ class Continuous(ContinuousBase): """ A numeric scale supporting norms and functional transforms. """ - transform: str | Transforms | None = None + values: tuple | str | None = None + trans: str | Transforms | None = None # TODO Add this to deal with outliers? # outside: Literal["keep", "drop", "clip"] = "keep" - # TODO maybe expose matplotlib more directly like this? - # def using(self, scale: mpl.scale.ScaleBase) ? + _priority: ClassVar[int] = 1 def tick( self, @@ -353,13 +422,13 @@ def tick( every: float | None = None, between: tuple[float, float] | None = None, minor: int | None = None, - ) -> Continuous: # TODO type return value as Self + ) -> Continuous: """ Configure the selection of ticks for the scale's axis or legend. Parameters ---------- - locator: matplotlib Locator + locator: :class:`matplotlib.ticker.Locator` subclass Pre-configured matplotlib locator; other parameters will not be used. at : sequence of floats Place ticks at these specific locations (in data units). @@ -376,47 +445,117 @@ def tick( Returns ------- - Returns self with new tick configuration. + Copy of self with new tick configuration. """ + # Input checks + if locator is not None and not isinstance(locator, Locator): + raise TypeError( + f"Tick locator must be an instance of {Locator!r}, " + f"not {type(locator)!r}." + ) + log_base, symlog_thresh = self._parse_for_log_params(self.trans) + if log_base or symlog_thresh: + if count is not None and between is None: + raise RuntimeError("`count` requires `between` with log transform.") + if every is not None: + raise RuntimeError("`every` not supported with log transform.") - # TODO what about symlog? - if isinstance(self.transform, str): - m = re.match(r"log(\d*)", self.transform) - log_transform = m is not None - log_base = m[1] or 10 if m is not None else None - forward, inverse = self._get_transform() - else: - log_transform = False - log_base = forward = inverse = None + new = copy(self) + new._tick_params = { + "locator": locator, + "at": at, + "upto": upto, + "count": count, + "every": every, + "between": between, + "minor": minor, + } + return new + + def label( + self, + formatter: Formatter | None = None, *, + like: str | Callable | None = None, + base: int | None = None, + unit: str | None = None, + ) -> Continuous: + """ + Configure the appearance of tick labels for the scale's axis or legend. + + Parameters + ---------- + formatter: :class:`matplotlib.ticker.Formatter` subclass + Pre-configured formatter to use; other parameters will be ignored. + like : str or callable + Either a format pattern (e.g., `".2f"`), a format string with fields named + `x` and/or `pos` (e.g., `"${x:.2f}"`), or a callable that consumes a number + and returns a string. + base : number + Use log formatter (with scientific notation) having this value as the base. + unit : str or (str, str) tuple + Use SI prefixes with these units (e.g., with `unit="g"`, a tick value + of 5000 will appear as `5 kg`). When a tuple, the first element gives the + seperator between the number and unit. + + Returns + ------- + Copy of self with new label configuration. + + """ + # Input checks + if formatter is not None and not isinstance(formatter, Formatter): + raise TypeError( + f"Label formatter must be an instance of {Formatter!r}, " + f"not {type(formatter)!r}" + ) + if like is not None and not (isinstance(like, str) or callable(like)): + msg = f"`like` must be a string or callable, not {type(like).__name__}." + raise TypeError(msg) + + new = copy(self) + new._label_params = { + "formatter": formatter, + "like": like, + "base": base, + "unit": unit, + } + return new + + def _parse_for_log_params( + self, trans: str | Transforms | None + ) -> tuple[float | None, float | None]: + + log_base = symlog_thresh = None + if isinstance(trans, str): + m = re.match(r"^log(\d*)", trans) + if m is not None: + log_base = float(m[1] or 10) + m = re.match(r"symlog(\d*)", trans) + if m is not None: + symlog_thresh = float(m[1] or 1) + return log_base, symlog_thresh + + def _get_locators(self, locator, at, upto, count, every, between, minor): + + log_base, symlog_thresh = self._parse_for_log_params(self.trans) if locator is not None: - # TODO accept tuple for major, minor? - if not isinstance(locator, Locator): - err = ( - f"Tick locator must be an instance of {Locator!r}, " - f"not {type(locator)!r}." - ) - raise TypeError(err) major_locator = locator - # TODO raise if locator is passed with any other parameters - elif upto is not None: - if log_transform: + if log_base: major_locator = LogLocator(base=log_base, numticks=upto) else: major_locator = MaxNLocator(upto, steps=[1, 1.5, 2, 2.5, 3, 5, 10]) elif count is not None: if between is None: - if log_transform: - msg = "`count` requires `between` with log transform." - raise RuntimeError(msg) # This is rarely useful (unless you are setting limits) major_locator = LinearLocator(count) else: - if log_transform: + if log_base or symlog_thresh: + forward, inverse = self._get_transform() lo, hi = forward(between) ticks = inverse(np.linspace(lo, hi, num=count)) else: @@ -424,9 +563,6 @@ def tick( major_locator = FixedLocator(ticks) elif every is not None: - if log_transform: - msg = "`every` not supported with log transform." - raise RuntimeError(msg) if between is None: major_locator = MultipleLocator(every) else: @@ -438,24 +574,60 @@ def tick( major_locator = FixedLocator(at) else: - major_locator = LogLocator(log_base) if log_transform else AutoLocator() + if log_base: + major_locator = LogLocator(log_base) + elif symlog_thresh: + major_locator = SymmetricalLogLocator(linthresh=symlog_thresh, base=10) + else: + major_locator = AutoLocator() if minor is None: - minor_locator = LogLocator(log_base, subs=None) if log_transform else None + minor_locator = LogLocator(log_base, subs=None) if log_base else None else: - if log_transform: + if log_base: subs = np.linspace(0, log_base, minor + 2)[1:-1] minor_locator = LogLocator(log_base, subs=subs) else: minor_locator = AutoMinorLocator(minor + 1) - self._major_locator = major_locator - self._minor_locator = minor_locator + return major_locator, minor_locator + + def _get_formatter(self, locator, formatter, like, base, unit): + + log_base, symlog_thresh = self._parse_for_log_params(self.trans) + if base is None: + if symlog_thresh: + log_base = 10 + base = log_base + + if formatter is not None: + return formatter + + if like is not None: + if isinstance(like, str): + if "{x" in like or "{pos" in like: + fmt = like + else: + fmt = f"{{x:{like}}}" + formatter = StrMethodFormatter(fmt) + else: + formatter = FuncFormatter(like) - return self + elif base is not None: + # We could add other log options if necessary + formatter = LogFormatterSciNotation(base) - # TODO need to fill this out - # def format(self, ...): + elif unit is not None: + if isinstance(unit, tuple): + sep, unit = unit + else: + sep = " " + formatter = EngFormatter(unit, sep=sep) + + else: + formatter = ScalarFormatter() + + return formatter @dataclass @@ -473,65 +645,107 @@ class Temporal(ContinuousBase): # those yet, and having a clear distinction betewen date(time) / time # may be more useful. - transform = None + trans = None + + _priority: ClassVar[int] = 2 def tick( self, locator: Locator | None = None, *, upto: int | None = None, ) -> Temporal: + """ + Configure the selection of ticks for the scale's axis or legend. + + .. note:: + This API is under construction and will be enhanced over time. + + Parameters + ---------- + locator: :class:`matplotlib.ticker.Locator` subclass + Pre-configured matplotlib locator; other parameters will not be used. + upto : int + Choose "nice" locations for ticks, but do not exceed this number. + + Returns + ------- + Copy of self with new tick configuration. + + """ + if locator is not None and not isinstance(locator, Locator): + err = ( + f"Tick locator must be an instance of {Locator!r}, " + f"not {type(locator)!r}." + ) + raise TypeError(err) + + new = copy(self) + new._tick_params = {"locator": locator, "upto": upto} + return new + + def label( + self, + formatter: Formatter | None = None, *, + concise: bool = False, + ) -> Temporal: + """ + Configure the appearance of tick labels for the scale's axis or legend. + + .. note:: + This API is under construction and will be enhanced over time. + + Parameters + ---------- + formatter: :class:`matplotlib.ticker.Formatter` subclass + Pre-configured formatter to use; other parameters will be ignored. + concise : bool + If True, use :class:`matplotlib.dates.ConciseDateFormatter` to make + the tick labels as compact as possible. + + Returns + ------- + Copy of self with new label configuration. + + """ + new = copy(self) + new._label_params = {"formatter": formatter, "concise": concise} + return new + + def _get_locators(self, locator, upto): if locator is not None: - # TODO accept tuple for major, minor? - if not isinstance(locator, Locator): - err = ( - f"Tick locator must be an instance of {Locator!r}, " - f"not {type(locator)!r}." - ) - raise TypeError(err) major_locator = locator - elif upto is not None: - # TODO atleast for minticks? major_locator = AutoDateLocator(minticks=2, maxticks=upto) else: major_locator = AutoDateLocator(minticks=2, maxticks=6) + minor_locator = None - self._major_locator = major_locator - self._minor_locator = None + return major_locator, minor_locator - self.format() + def _get_formatter(self, locator, formatter, concise): - return self - - def format( - self, formater: Formatter | None = None, *, - concise: bool = False, - ) -> Temporal: + if formatter is not None: + return formatter - # TODO ideally we would have concise coordinate ticks, - # but full semantic ticks. Is that possible? if concise: - major_formatter = ConciseDateFormatter(self._major_locator) + # TODO ideally we would have concise coordinate ticks, + # but full semantic ticks. Is that possible? + formatter = ConciseDateFormatter(locator) else: - major_formatter = AutoDateFormatter(self._major_locator) - self._major_formatter = major_formatter + formatter = AutoDateFormatter(locator) - return self + return formatter # ----------------------------------------------------------------------------------- # -class Calendric(ScaleSpec): - # TODO have this separate from Temporal or have Temporal(date=True) or similar? - ... - - -class Binned(ScaleSpec): - # Needed? Or handle this at layer (in stat or as param, eg binning=) - ... +# TODO Have this separate from Temporal or have Temporal(date=True) or similar? +# class Calendric(Scale): +# TODO Needed? Or handle this at layer (in stat or as param, eg binning=) +# class Binned(Scale): # TODO any need for color-specific scales? # class Sequential(Continuous): @@ -552,7 +766,7 @@ class PseudoAxis: code, this object acts like an Axis and can be used to scale other variables. """ - axis_name = "" # TODO Needs real value? Just used for x/y logic in matplotlib + axis_name = "" # Matplotlib requirement but not actually used def __init__(self, scale): @@ -567,11 +781,9 @@ def __init__(self, scale): self._data_interval = None, None scale.set_default_locators_and_formatters(self) - # self.set_default_intervals() TODO mock? + # self.set_default_intervals() Is this ever needed? def set_view_interval(self, vmin, vmax): - # TODO this gets called when setting DateTime units, - # but we may not need it to do anything self._view_interval = vmin, vmax def get_view_interval(self): @@ -598,8 +810,6 @@ def set_major_locator(self, locator): locator.set_axis(self) def set_major_formatter(self, formatter): - # TODO matplotlib method does more handling (e.g. to set w/format str) - # We will probably handle that in the tick/format interface, though self.major.formatter = formatter formatter.set_axis(self) @@ -625,12 +835,11 @@ def update_units(self, x): if info is None: return if info.majloc is not None: - # TODO matplotlib method has more conditions here; are they needed? self.set_major_locator(info.majloc) if info.majfmt is not None: self.set_major_formatter(info.majfmt) - # TODO this is in matplotlib method; do we need this? + # This is in matplotlib method; do we need this? # self.set_default_intervals() def convert_units(self, x): @@ -642,17 +851,19 @@ def convert_units(self, x): return self.converter.convert(x, self.units, self) def get_scale(self): - # TODO matplotlib actually returns a string here! + # Note that matplotlib actually returns a string here! + # (e.g., with a log scale, axis.get_scale() returns "log") # Currently we just hit it with minor ticks where it checks for # scale == "log". I'm not sure how you'd actually use log-scale - # minor "ticks" in a legend context, so this is fine..... + # minor "ticks" in a legend context, so this is fine.... return self.scale def get_majorticklocs(self): return self.major.locator() -# ------------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------------ # +# Transform function creation def _make_identity_transforms() -> Transforms: diff --git a/seaborn/_marks/base.py b/seaborn/_marks/base.py index 5bf7d3da1a..5b796f8b39 100644 --- a/seaborn/_marks/base.py +++ b/seaborn/_marks/base.py @@ -1,19 +1,24 @@ from __future__ import annotations from dataclasses import dataclass, fields, field +from typing import Any, Callable, Union +from collections.abc import Generator import numpy as np import pandas as pd import matplotlib as mpl -from seaborn._core.properties import PROPERTIES, Property - -from typing import Any, Callable, Union -from collections.abc import Generator from numpy import ndarray from pandas import DataFrame from matplotlib.artist import Artist -from seaborn._core.properties import RGBATuple, DashPattern, DashPatternWithOffset + from seaborn._core.scales import Scale +from seaborn._core.properties import ( + PROPERTIES, + Property, + RGBATuple, + DashPattern, + DashPatternWithOffset, +) class Mappable: @@ -184,27 +189,11 @@ def _infer_orient(self, scales: dict) -> str: # TODO type scales # TODO rethink this to map from scale type to "DV priority" and use that? # e.g. Nominal > Discrete > Continuous - x_type = None if "x" not in scales else scales["x"].scale_type - y_type = None if "y" not in scales else scales["y"].scale_type + x = 0 if "x" not in scales else scales["x"]._priority + y = 0 if "y" not in scales else scales["y"]._priority - if x_type is None or x_type == "computed": + if y > x: return "y" - - elif y_type is None or y_type == "computed": - return "x" - - elif x_type != "nominal" and y_type == "nominal": - return "y" - - elif x_type != "continuous" and y_type == "continuous": - - # TODO should we try to orient based on number of unique values? - - return "x" - - elif x_type == "continuous" and y_type != "continuous": - return "y" - else: return "x" diff --git a/seaborn/_stats/histograms.py b/seaborn/_stats/histograms.py index 5e2a565f7b..8f069b64e5 100644 --- a/seaborn/_stats/histograms.py +++ b/seaborn/_stats/histograms.py @@ -117,7 +117,10 @@ def _normalize(self, data, orient): def __call__(self, data, groupby, orient, scales): - scale_type = scales[orient].scale_type + # TODO better to do this as an isinstance check? + # We are only asking about Nominal scales now, + # but presumably would apply to Ordinal too? + scale_type = scales[orient].__class__.__name__.lower() grouping_vars = [v for v in data if v in groupby.order] if not grouping_vars or self.common_bins is True: bin_kws = self._define_bin_params(data, orient, scale_type) diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index b07922d6e4..63d3767fc6 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -319,10 +319,10 @@ class TestScaling: def test_inference(self, long_df): - for col, scale_type in zip("zat", ["continuous", "nominal", "temporal"]): + for col, scale_type in zip("zat", ["Continuous", "Nominal", "Temporal"]): p = Plot(long_df, x=col, y=col).add(MockMark()).plot() for var in "xy": - assert p._scales[var].scale_type == scale_type + assert p._scales[var].__class__.__name__ == scale_type def test_inference_from_layer_data(self): @@ -551,7 +551,7 @@ def __call__(self, data, groupby, orient, scales): s = MockStat() y = ["a", "a", "b", "c"] Plot(y=y).add(MockMark(), s).plot() - assert s.scales["y"].scale_type == "nominal" + assert s.scales["y"].__class__.__name__ == "Nominal" # TODO where should RGB consistency be enforced? @pytest.mark.xfail( diff --git a/tests/_core/test_scales.py b/tests/_core/test_scales.py index 5883d9f2d4..f16a6d3629 100644 --- a/tests/_core/test_scales.py +++ b/tests/_core/test_scales.py @@ -1,3 +1,4 @@ +import re import numpy as np import pandas as pd @@ -22,6 +23,7 @@ Fill, ) from seaborn.palettes import color_palette +from seaborn.external.version import Version class TestContinuous: @@ -30,90 +32,106 @@ class TestContinuous: def x(self): return pd.Series([1, 3, 9], name="x", dtype=float) + def setup_ticks(self, x, *args, **kwargs): + + s = Continuous().tick(*args, **kwargs)._setup(x, Coordinate()) + a = PseudoAxis(s._matplotlib_scale) + a.set_view_interval(0, 1) + return a + + def setup_labels(self, x, *args, **kwargs): + + s = Continuous().label(*args, **kwargs)._setup(x, Coordinate()) + a = PseudoAxis(s._matplotlib_scale) + a.set_view_interval(0, 1) + locs = a.major.locator() + return a, locs + def test_coordinate_defaults(self, x): - s = Continuous().setup(x, Coordinate()) + s = Continuous()._setup(x, Coordinate()) assert_series_equal(s(x), x) - assert_series_equal(s.invert_axis_transform(s(x)), x) def test_coordinate_transform(self, x): - s = Continuous(transform="log").setup(x, Coordinate()) + s = Continuous(trans="log")._setup(x, Coordinate()) assert_series_equal(s(x), np.log10(x)) - assert_series_equal(s.invert_axis_transform(s(x)), x) def test_coordinate_transform_with_parameter(self, x): - s = Continuous(transform="pow3").setup(x, Coordinate()) + s = Continuous(trans="pow3")._setup(x, Coordinate()) assert_series_equal(s(x), np.power(x, 3)) - assert_series_equal(s.invert_axis_transform(s(x)), x) + + def test_coordinate_transform_error(self, x): + + s = Continuous(trans="bad") + with pytest.raises(ValueError, match="Unknown value provided"): + s._setup(x, Coordinate()) def test_interval_defaults(self, x): - s = Continuous().setup(x, IntervalProperty()) + s = Continuous()._setup(x, IntervalProperty()) assert_array_equal(s(x), [0, .25, 1]) def test_interval_with_range(self, x): - s = Continuous((1, 3)).setup(x, IntervalProperty()) + s = Continuous((1, 3))._setup(x, IntervalProperty()) assert_array_equal(s(x), [1, 1.5, 3]) def test_interval_with_norm(self, x): - s = Continuous(norm=(3, 7)).setup(x, IntervalProperty()) + s = Continuous(norm=(3, 7))._setup(x, IntervalProperty()) assert_array_equal(s(x), [-.5, 0, 1.5]) def test_interval_with_range_norm_and_transform(self, x): x = pd.Series([1, 10, 100]) # TODO param order? - s = Continuous((2, 3), (10, 100), "log").setup(x, IntervalProperty()) + s = Continuous((2, 3), (10, 100), "log")._setup(x, IntervalProperty()) assert_array_equal(s(x), [1, 2, 3]) def test_color_defaults(self, x): cmap = color_palette("ch:", as_cmap=True) - s = Continuous().setup(x, Color()) + s = Continuous()._setup(x, Color()) assert_array_equal(s(x), cmap([0, .25, 1])[:, :3]) # FIXME RGBA def test_color_named_values(self, x): cmap = color_palette("viridis", as_cmap=True) - s = Continuous("viridis").setup(x, Color()) + s = Continuous("viridis")._setup(x, Color()) assert_array_equal(s(x), cmap([0, .25, 1])[:, :3]) # FIXME RGBA def test_color_tuple_values(self, x): cmap = color_palette("blend:b,g", as_cmap=True) - s = Continuous(("b", "g")).setup(x, Color()) + s = Continuous(("b", "g"))._setup(x, Color()) assert_array_equal(s(x), cmap([0, .25, 1])[:, :3]) # FIXME RGBA def test_color_callable_values(self, x): cmap = color_palette("light:r", as_cmap=True) - s = Continuous(cmap).setup(x, Color()) + s = Continuous(cmap)._setup(x, Color()) assert_array_equal(s(x), cmap([0, .25, 1])[:, :3]) # FIXME RGBA def test_color_with_norm(self, x): cmap = color_palette("ch:", as_cmap=True) - s = Continuous(norm=(3, 7)).setup(x, Color()) + s = Continuous(norm=(3, 7))._setup(x, Color()) assert_array_equal(s(x), cmap([-.5, 0, 1.5])[:, :3]) # FIXME RGBA def test_color_with_transform(self, x): x = pd.Series([1, 10, 100], name="x", dtype=float) cmap = color_palette("ch:", as_cmap=True) - s = Continuous(transform="log").setup(x, Color()) + s = Continuous(trans="log")._setup(x, Color()) assert_array_equal(s(x), cmap([0, .5, 1])[:, :3]) # FIXME RGBA def test_tick_locator(self, x): locs = [.2, .5, .8] locator = mpl.ticker.FixedLocator(locs) - s = Continuous().tick(locator).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) - a.set_view_interval(0, 1) + a = self.setup_ticks(x, locator) assert_array_equal(a.major.locator(), locs) def test_tick_locator_input_check(self, x): @@ -125,60 +143,46 @@ def test_tick_locator_input_check(self, x): def test_tick_upto(self, x): for n in [2, 5, 10]: - s = Continuous().tick(upto=n).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) - a.set_view_interval(0, 1) + a = self.setup_ticks(x, upto=n) assert len(a.major.locator()) <= (n + 1) def test_tick_every(self, x): for d in [.05, .2, .5]: - s = Continuous().tick(every=d).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) - a.set_view_interval(0, 1) + a = self.setup_ticks(x, every=d) assert np.allclose(np.diff(a.major.locator()), d) def test_tick_every_between(self, x): lo, hi = .2, .8 for d in [.05, .2, .5]: - s = Continuous().tick(every=d, between=(lo, hi)).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) - a.set_view_interval(0, 1) + a = self.setup_ticks(x, every=d, between=(lo, hi)) expected = np.arange(lo, hi + d, d) assert_array_equal(a.major.locator(), expected) def test_tick_at(self, x): locs = [.2, .5, .9] - s = Continuous().tick(at=locs).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) - a.set_view_interval(0, 1) + a = self.setup_ticks(x, at=locs) assert_array_equal(a.major.locator(), locs) def test_tick_count(self, x): n = 8 - s = Continuous().tick(count=n).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) - a.set_view_interval(0, 1) + a = self.setup_ticks(x, count=n) assert_array_equal(a.major.locator(), np.linspace(0, 1, n)) def test_tick_count_between(self, x): n = 5 lo, hi = .2, .7 - s = Continuous().tick(count=n, between=(lo, hi)).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) - a.set_view_interval(0, 1) + a = self.setup_ticks(x, count=n, between=(lo, hi)) assert_array_equal(a.major.locator(), np.linspace(lo, hi, n)) def test_tick_minor(self, x): n = 3 - s = Continuous().tick(count=2, minor=n).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) - a.set_view_interval(0, 1) + a = self.setup_ticks(x, count=2, minor=n) # I am not sure why matplotlib's minor ticks include the # largest major location but exclude the smalllest one ... expected = np.linspace(0, 1, n + 2)[1:] @@ -186,8 +190,8 @@ def test_tick_minor(self, x): def test_log_tick_default(self, x): - s = Continuous(transform="log").setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) + s = Continuous(trans="log")._setup(x, Coordinate()) + a = PseudoAxis(s._matplotlib_scale) a.set_view_interval(.5, 1050) ticks = a.major.locator() assert np.allclose(np.diff(np.log10(ticks)), 1) @@ -195,24 +199,102 @@ def test_log_tick_default(self, x): def test_log_tick_upto(self, x): n = 3 - s = Continuous(transform="log").tick(upto=n).setup(x, Coordinate()) - a = PseudoAxis(s.matplotlib_scale) + s = Continuous(trans="log").tick(upto=n)._setup(x, Coordinate()) + a = PseudoAxis(s._matplotlib_scale) assert a.major.locator.numticks == n def test_log_tick_count(self, x): with pytest.raises(RuntimeError, match="`count` requires"): - Continuous(transform="log").tick(count=4) + Continuous(trans="log").tick(count=4) - s = Continuous(transform="log").tick(count=4, between=(1, 1000)) - a = PseudoAxis(s.setup(x, Coordinate()).matplotlib_scale) + s = Continuous(trans="log").tick(count=4, between=(1, 1000)) + a = PseudoAxis(s._setup(x, Coordinate())._matplotlib_scale) a.set_view_interval(.5, 1050) assert_array_equal(a.major.locator(), [1, 10, 100, 1000]) def test_log_tick_every(self, x): with pytest.raises(RuntimeError, match="`every` not supported"): - Continuous(transform="log").tick(every=2) + Continuous(trans="log").tick(every=2) + + def test_symlog_tick_default(self, x): + + s = Continuous(trans="symlog")._setup(x, Coordinate()) + a = PseudoAxis(s._matplotlib_scale) + a.set_view_interval(-1050, 1050) + ticks = a.major.locator() + assert ticks[0] == -ticks[-1] + pos_ticks = np.sort(np.unique(np.abs(ticks))) + assert np.allclose(np.diff(np.log10(pos_ticks[1:])), 1) + assert pos_ticks[0] == 0 + + def test_label_formatter(self, x): + + fmt = mpl.ticker.FormatStrFormatter("%.3f") + a, locs = self.setup_labels(x, fmt) + labels = a.major.formatter.format_ticks(locs) + for text in labels: + assert re.match(r"^\d\.\d{3}$", text) + + def test_label_like_pattern(self, x): + + a, locs = self.setup_labels(x, like=".4f") + labels = a.major.formatter.format_ticks(locs) + for text in labels: + assert re.match(r"^\d\.\d{4}$", text) + + def test_label_like_string(self, x): + + a, locs = self.setup_labels(x, like="x = {x:.1f}") + labels = a.major.formatter.format_ticks(locs) + for text in labels: + assert re.match(r"^x = \d\.\d$", text) + + def test_label_like_function(self, x): + + a, locs = self.setup_labels(x, like="{:^5.1f}".format) + labels = a.major.formatter.format_ticks(locs) + for text in labels: + assert re.match(r"^ \d\.\d $", text) + + def test_label_base(self, x): + + a, locs = self.setup_labels(100 * x, base=2) + labels = a.major.formatter.format_ticks(locs) + for text in labels[1:]: + assert not text or "2^" in text + + def test_label_unit(self, x): + + a, locs = self.setup_labels(1000 * x, unit="g") + labels = a.major.formatter.format_ticks(locs) + for text in labels[1:-1]: + assert re.match(r"^\d+ mg$", text) + + def test_label_unit_with_sep(self, x): + + a, locs = self.setup_labels(1000 * x, unit=("", "g")) + labels = a.major.formatter.format_ticks(locs) + for text in labels[1:-1]: + assert re.match(r"^\d+mg$", text) + + def test_label_base_from_transform(self, x): + + s = Continuous(trans="log") + a = PseudoAxis(s._setup(x, Coordinate())._matplotlib_scale) + a.set_view_interval(10, 1000) + label, = a.major.formatter.format_ticks([100]) + assert r"10^{2}" in label + + def test_label_type_checks(self): + + s = Continuous() + with pytest.raises(TypeError, match="Label formatter must be"): + s.label("{x}") + + with pytest.raises(TypeError, match="`like` must be"): + s.label(like=2) class TestNominal: @@ -227,26 +309,23 @@ def y(self): def test_coordinate_defaults(self, x): - s = Nominal().setup(x, Coordinate()) + s = Nominal()._setup(x, Coordinate()) assert_array_equal(s(x), np.array([0, 1, 2, 1], float)) - assert_array_equal(s.invert_axis_transform(s(x)), s(x)) def test_coordinate_with_order(self, x): - s = Nominal(order=["a", "b", "c"]).setup(x, Coordinate()) + s = Nominal(order=["a", "b", "c"])._setup(x, Coordinate()) assert_array_equal(s(x), np.array([0, 2, 1, 2], float)) - assert_array_equal(s.invert_axis_transform(s(x)), s(x)) def test_coordinate_with_subset_order(self, x): - s = Nominal(order=["c", "a"]).setup(x, Coordinate()) + s = Nominal(order=["c", "a"])._setup(x, Coordinate()) assert_array_equal(s(x), np.array([1, 0, np.nan, 0], float)) - assert_array_equal(s.invert_axis_transform(s(x)), s(x)) def test_coordinate_axis(self, x): ax = mpl.figure.Figure().subplots() - s = Nominal().setup(x, Coordinate(), ax.xaxis) + s = Nominal()._setup(x, Coordinate(), ax.xaxis) assert_array_equal(s(x), np.array([0, 1, 2, 1], float)) f = ax.xaxis.get_major_formatter() assert f.format_ticks([0, 1, 2]) == ["a", "c", "b"] @@ -255,7 +334,7 @@ def test_coordinate_axis_with_order(self, x): order = ["a", "b", "c"] ax = mpl.figure.Figure().subplots() - s = Nominal(order=order).setup(x, Coordinate(), ax.xaxis) + s = Nominal(order=order)._setup(x, Coordinate(), ax.xaxis) assert_array_equal(s(x), np.array([0, 2, 1, 2], float)) f = ax.xaxis.get_major_formatter() assert f.format_ticks([0, 1, 2]) == order @@ -264,7 +343,7 @@ def test_coordinate_axis_with_subset_order(self, x): order = ["c", "a"] ax = mpl.figure.Figure().subplots() - s = Nominal(order=order).setup(x, Coordinate(), ax.xaxis) + s = Nominal(order=order)._setup(x, Coordinate(), ax.xaxis) assert_array_equal(s(x), np.array([1, 0, np.nan, 0], float)) f = ax.xaxis.get_major_formatter() assert f.format_ticks([0, 1, 2]) == [*order, ""] @@ -274,7 +353,7 @@ def test_coordinate_axis_with_category_dtype(self, x): order = ["b", "a", "d", "c"] x = x.astype(pd.CategoricalDtype(order)) ax = mpl.figure.Figure().subplots() - s = Nominal().setup(x, Coordinate(), ax.xaxis) + s = Nominal()._setup(x, Coordinate(), ax.xaxis) assert_array_equal(s(x), np.array([1, 3, 0, 3], float)) f = ax.xaxis.get_major_formatter() assert f.format_ticks([0, 1, 2, 3]) == order @@ -282,7 +361,7 @@ def test_coordinate_axis_with_category_dtype(self, x): def test_coordinate_numeric_data(self, y): ax = mpl.figure.Figure().subplots() - s = Nominal().setup(y, Coordinate(), ax.yaxis) + s = Nominal()._setup(y, Coordinate(), ax.yaxis) assert_array_equal(s(y), np.array([1, 0, 2, 0], float)) f = ax.yaxis.get_major_formatter() assert f.format_ticks([0, 1, 2]) == ["-1.5", "1.0", "3.0"] @@ -291,46 +370,46 @@ def test_coordinate_numeric_data_with_order(self, y): order = [1, 4, -1.5] ax = mpl.figure.Figure().subplots() - s = Nominal(order=order).setup(y, Coordinate(), ax.yaxis) + s = Nominal(order=order)._setup(y, Coordinate(), ax.yaxis) assert_array_equal(s(y), np.array([0, 2, np.nan, 2], float)) f = ax.yaxis.get_major_formatter() assert f.format_ticks([0, 1, 2]) == ["1.0", "4.0", "-1.5"] def test_color_defaults(self, x): - s = Nominal().setup(x, Color()) + s = Nominal()._setup(x, Color()) cs = color_palette() assert_array_equal(s(x), [cs[0], cs[1], cs[2], cs[1]]) def test_color_named_palette(self, x): pal = "flare" - s = Nominal(pal).setup(x, Color()) + s = Nominal(pal)._setup(x, Color()) cs = color_palette(pal, 3) assert_array_equal(s(x), [cs[0], cs[1], cs[2], cs[1]]) def test_color_list_palette(self, x): cs = color_palette("crest", 3) - s = Nominal(cs).setup(x, Color()) + s = Nominal(cs)._setup(x, Color()) assert_array_equal(s(x), [cs[0], cs[1], cs[2], cs[1]]) def test_color_dict_palette(self, x): cs = color_palette("crest", 3) pal = dict(zip("bac", cs)) - s = Nominal(pal).setup(x, Color()) + s = Nominal(pal)._setup(x, Color()) assert_array_equal(s(x), [cs[1], cs[2], cs[0], cs[2]]) def test_color_numeric_data(self, y): - s = Nominal().setup(y, Color()) + s = Nominal()._setup(y, Color()) cs = color_palette() assert_array_equal(s(y), [cs[1], cs[0], cs[2], cs[0]]) def test_color_numeric_with_order_subset(self, y): - s = Nominal(order=[-1.5, 1]).setup(y, Color()) + s = Nominal(order=[-1.5, 1])._setup(y, Color()) c1, c2 = color_palette(n_colors=2) null = (np.nan, np.nan, np.nan) assert_array_equal(s(y), [c2, c1, null, c1]) @@ -339,7 +418,7 @@ def test_color_numeric_with_order_subset(self, y): def test_color_numeric_int_float_mix(self): z = pd.Series([1, 2], name="z") - s = Nominal(order=[1.0, 2]).setup(z, Color()) + s = Nominal(order=[1.0, 2])._setup(z, Color()) c1, c2 = color_palette(n_colors=2) null = (np.nan, np.nan, np.nan) assert_array_equal(s(z), [c1, null, c2]) @@ -347,7 +426,7 @@ def test_color_numeric_int_float_mix(self): def test_color_alpha_in_palette(self, x): cs = [(.2, .2, .3, .5), (.1, .2, .3, 1), (.5, .6, .2, 0)] - s = Nominal(cs).setup(x, Color()) + s = Nominal(cs)._setup(x, Color()) assert_array_equal(s(x), [cs[0], cs[1], cs[2], cs[1]]) def test_color_unknown_palette(self, x): @@ -355,7 +434,7 @@ def test_color_unknown_palette(self, x): pal = "not_a_palette" err = f"{pal} is not a valid palette name" with pytest.raises(ValueError, match=err): - Nominal(pal).setup(x, Color()) + Nominal(pal)._setup(x, Color()) def test_object_defaults(self, x): @@ -363,62 +442,62 @@ class MockProperty(ObjectProperty): def _default_values(self, n): return list("xyz"[:n]) - s = Nominal().setup(x, MockProperty()) + s = Nominal()._setup(x, MockProperty()) assert s(x) == ["x", "y", "z", "y"] def test_object_list(self, x): vs = ["x", "y", "z"] - s = Nominal(vs).setup(x, ObjectProperty()) + s = Nominal(vs)._setup(x, ObjectProperty()) assert s(x) == ["x", "y", "z", "y"] def test_object_dict(self, x): vs = {"a": "x", "b": "y", "c": "z"} - s = Nominal(vs).setup(x, ObjectProperty()) + s = Nominal(vs)._setup(x, ObjectProperty()) assert s(x) == ["x", "z", "y", "z"] def test_object_order(self, x): vs = ["x", "y", "z"] - s = Nominal(vs, order=["c", "a", "b"]).setup(x, ObjectProperty()) + s = Nominal(vs, order=["c", "a", "b"])._setup(x, ObjectProperty()) assert s(x) == ["y", "x", "z", "x"] def test_object_order_subset(self, x): vs = ["x", "y"] - s = Nominal(vs, order=["a", "c"]).setup(x, ObjectProperty()) + s = Nominal(vs, order=["a", "c"])._setup(x, ObjectProperty()) assert s(x) == ["x", "y", None, "y"] def test_objects_that_are_weird(self, x): vs = [("x", 1), (None, None, 0), {}] - s = Nominal(vs).setup(x, ObjectProperty()) + s = Nominal(vs)._setup(x, ObjectProperty()) assert s(x) == [vs[0], vs[1], vs[2], vs[1]] def test_alpha_default(self, x): - s = Nominal().setup(x, Alpha()) + s = Nominal()._setup(x, Alpha()) assert_array_equal(s(x), [.95, .625, .3, .625]) def test_fill(self): x = pd.Series(["a", "a", "b", "a"], name="x") - s = Nominal().setup(x, Fill()) + s = Nominal()._setup(x, Fill()) assert_array_equal(s(x), [True, True, False, True]) def test_fill_dict(self): x = pd.Series(["a", "a", "b", "a"], name="x") vs = {"a": False, "b": True} - s = Nominal(vs).setup(x, Fill()) + s = Nominal(vs)._setup(x, Fill()) assert_array_equal(s(x), [False, False, True, False]) def test_fill_nunique_warning(self): x = pd.Series(["a", "b", "c", "a", "b"], name="x") with pytest.warns(UserWarning, match="The variable assigned to fill"): - s = Nominal().setup(x, Fill()) + s = Nominal()._setup(x, Fill()) assert_array_equal(s(x), [True, False, True, True, False]) def test_interval_defaults(self, x): @@ -426,29 +505,29 @@ def test_interval_defaults(self, x): class MockProperty(IntervalProperty): _default_range = (1, 2) - s = Nominal().setup(x, MockProperty()) + s = Nominal()._setup(x, MockProperty()) assert_array_equal(s(x), [2, 1.5, 1, 1.5]) def test_interval_tuple(self, x): - s = Nominal((1, 2)).setup(x, IntervalProperty()) + s = Nominal((1, 2))._setup(x, IntervalProperty()) assert_array_equal(s(x), [2, 1.5, 1, 1.5]) def test_interval_tuple_numeric(self, y): - s = Nominal((1, 2)).setup(y, IntervalProperty()) + s = Nominal((1, 2))._setup(y, IntervalProperty()) assert_array_equal(s(y), [1.5, 2, 1, 2]) def test_interval_list(self, x): vs = [2, 5, 4] - s = Nominal(vs).setup(x, IntervalProperty()) + s = Nominal(vs)._setup(x, IntervalProperty()) assert_array_equal(s(x), [2, 5, 4, 5]) def test_interval_dict(self, x): vs = {"a": 3, "b": 4, "c": 6} - s = Nominal(vs).setup(x, IntervalProperty()) + s = Nominal(vs)._setup(x, IntervalProperty()) assert_array_equal(s(x), [3, 6, 4, 6]) def test_interval_with_transform(self, x): @@ -457,7 +536,7 @@ class MockProperty(IntervalProperty): _forward = np.square _inverse = np.sqrt - s = Nominal((2, 4)).setup(x, MockProperty()) + s = Nominal((2, 4))._setup(x, MockProperty()) assert_array_equal(s(x), [4, np.sqrt(10), 2, np.sqrt(10)]) @@ -474,19 +553,19 @@ def x(self, t): def test_coordinate_defaults(self, t, x): - s = Temporal().setup(t, Coordinate()) + s = Temporal()._setup(t, Coordinate()) assert_array_equal(s(t), x) def test_interval_defaults(self, t, x): - s = Temporal().setup(t, IntervalProperty()) + s = Temporal()._setup(t, IntervalProperty()) normed = (x - x.min()) / (x.max() - x.min()) assert_array_equal(s(t), normed) def test_interval_with_range(self, t, x): values = (1, 3) - s = Temporal((1, 3)).setup(t, IntervalProperty()) + s = Temporal((1, 3))._setup(t, IntervalProperty()) normed = (x - x.min()) / (x.max() - x.min()) expected = normed * (values[1] - values[0]) + values[0] assert_array_equal(s(t), expected) @@ -494,7 +573,7 @@ def test_interval_with_range(self, t, x): def test_interval_with_norm(self, t, x): norm = t[1], t[2] - s = Temporal(norm=norm).setup(t, IntervalProperty()) + s = Temporal(norm=norm)._setup(t, IntervalProperty()) n = mpl.dates.date2num(norm) normed = (x - n[0]) / (n[1] - n[0]) assert_array_equal(s(t), normed) @@ -502,7 +581,7 @@ def test_interval_with_norm(self, t, x): def test_color_defaults(self, t, x): cmap = color_palette("ch:", as_cmap=True) - s = Temporal().setup(t, Color()) + s = Temporal()._setup(t, Color()) normed = (x - x.min()) / (x.max() - x.min()) assert_array_equal(s(t), cmap(normed)[:, :3]) # FIXME RGBA @@ -510,31 +589,56 @@ def test_color_named_values(self, t, x): name = "viridis" cmap = color_palette(name, as_cmap=True) - s = Temporal(name).setup(t, Color()) + s = Temporal(name)._setup(t, Color()) normed = (x - x.min()) / (x.max() - x.min()) assert_array_equal(s(t), cmap(normed)[:, :3]) # FIXME RGBA def test_coordinate_axis(self, t, x): ax = mpl.figure.Figure().subplots() - s = Temporal().setup(t, Coordinate(), ax.xaxis) + s = Temporal()._setup(t, Coordinate(), ax.xaxis) assert_array_equal(s(t), x) locator = ax.xaxis.get_major_locator() formatter = ax.xaxis.get_major_formatter() assert isinstance(locator, mpl.dates.AutoDateLocator) assert isinstance(formatter, mpl.dates.AutoDateFormatter) - def test_concise_format(self, t, x): + @pytest.mark.skipif( + Version(mpl.__version__) < Version("3.3.0"), + reason="Test requires new matplotlib date epoch." + ) + def test_tick_locator(self, t): - ax = mpl.figure.Figure().subplots() - Temporal().format(concise=True).setup(t, Coordinate(), ax.xaxis) - formatter = ax.xaxis.get_major_formatter() - assert isinstance(formatter, mpl.dates.ConciseDateFormatter) + locator = mpl.dates.YearLocator(month=3, day=15) + s = Temporal().tick(locator) + a = PseudoAxis(s._setup(t, Coordinate())._matplotlib_scale) + a.set_view_interval(0, 365) + assert 73 in a.major.locator() def test_tick_upto(self, t, x): n = 8 ax = mpl.figure.Figure().subplots() - Temporal().tick(upto=n).setup(t, Coordinate(), ax.xaxis) + Temporal().tick(upto=n)._setup(t, Coordinate(), ax.xaxis) locator = ax.xaxis.get_major_locator() assert set(locator.maxticks.values()) == {n} + + @pytest.mark.skipif( + Version(mpl.__version__) < Version("3.3.0"), + reason="Test requires new matplotlib date epoch." + ) + def test_label_formatter(self, t): + + formatter = mpl.dates.DateFormatter("%Y") + s = Temporal().label(formatter) + a = PseudoAxis(s._setup(t, Coordinate())._matplotlib_scale) + a.set_view_interval(10, 1000) + label, = a.major.formatter.format_ticks([100]) + assert label == "1970" + + def test_label_concise(self, t, x): + + ax = mpl.figure.Figure().subplots() + Temporal().label(concise=True)._setup(t, Coordinate(), ax.xaxis) + formatter = ax.xaxis.get_major_formatter() + assert isinstance(formatter, mpl.dates.ConciseDateFormatter) diff --git a/tests/test_categorical.py b/tests/test_categorical.py index 686fb7bfc4..56b0dbf65f 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -1952,7 +1952,7 @@ def test_hue_dodged(self, long_df, hue_var): @pytest.mark.parametrize( "val_var,val_col,hue_col", - itertools.product(["x", "y"], ["b", "y", "t"], [None, "a"]), + list(itertools.product(["x", "y"], ["b", "y", "t"], [None, "a"])), ) def test_single(self, long_df, val_var, val_col, hue_col):