Skip to content

Commit

Permalink
Drop missing data centrally while allowing marks to disable (#2816)
Browse files Browse the repository at this point in the history
  • Loading branch information
mwaskom authored May 22, 2022
1 parent 34b8873 commit 995ea1c
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 6 deletions.
6 changes: 5 additions & 1 deletion seaborn/_core/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1263,12 +1263,16 @@ def _setup_split_generator(
order = categorical_order(df[var])
grouping_keys.append(order)

def split_generator() -> Generator:
def split_generator(dropna=True) -> Generator:

for view in subplots:

axes_df = self._filter_subplot_data(df, view)

if dropna:
with pd.option_context("mode.use_inf_as_null", True):
axes_df = axes_df.dropna()

subplot_keys = {}
for dim in ["col", "row"]:
if view[dim] is not None:
Expand Down
7 changes: 6 additions & 1 deletion seaborn/_core/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ class ObjectProperty(Property):
normed = False

# Object representing null data, should appear invisible when drawn by matplotlib
# Note that we now drop nulls in Plot._plot_layer and thus may not need this
null_value: Any = None

def _default_values(self, n: int) -> list:
Expand Down Expand Up @@ -720,7 +721,11 @@ def get_mapping(
raise TypeError(msg)

def mapping(x):
return np.take(values, np.asarray(x, np.intp))
ixs = np.asarray(x, np.intp)
return [
values[ix] if np.isfinite(x_i) else False
for x_i, ix in zip(x, ixs)
]

return mapping

Expand Down
7 changes: 5 additions & 2 deletions seaborn/_marks/basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations
from dataclasses import dataclass

import numpy as np
import matplotlib as mpl

from seaborn._marks.base import (
Expand Down Expand Up @@ -35,13 +36,15 @@ class Line(Mark):

def _plot(self, split_gen, scales, orient):

for keys, data, ax in split_gen():
for keys, data, ax in split_gen(dropna=False):

keys = resolve_properties(self, keys, scales)

if self.sort:
# TODO where to dropna?
data = data.dropna().sort_values(orient)
data = data.sort_values(orient)
else:
data.loc[data.isna().any(axis=1), ["x", "y"]] = np.nan

line = mpl.lines.Line2D(
data["x"].to_numpy(),
Expand Down
3 changes: 1 addition & 2 deletions seaborn/_marks/scatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _resolve_properties(self, data, scales):
filled_marker = [m.is_filled() for m in resolved["marker"]]

resolved["linewidth"] = resolved["stroke"]
resolved["fill"] = resolved["fill"] & filled_marker
resolved["fill"] = resolved["fill"] * filled_marker
resolved["size"] = resolved["pointsize"] ** 2

resolved["edgecolor"] = resolve_color(self, data, "", scales)
Expand All @@ -91,7 +91,6 @@ def _plot(self, split_gen, scales, orient):
# (That should be solved upstream by defaulting to "" for unset x/y?)
# (Be mindful of xmin/xmax, etc!)

# TODO pass scales *into* split_gen?
for keys, data, ax in split_gen():

offsets = np.column_stack([data["x"], data["y"]])
Expand Down
13 changes: 13 additions & 0 deletions seaborn/tests/_marks/test_scatter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from matplotlib.colors import to_rgba, to_rgba_array

import pytest
from numpy.testing import assert_array_equal

from seaborn._core.plot import Plot
Expand Down Expand Up @@ -139,3 +140,15 @@ def test_filled_unfilled_mix(self):

expected = [mark.edgewidth, mark.stroke]
assert_array_equal(points.get_linewidths(), expected)

@pytest.mark.parametrize("prop", ["color", "fill", "marker", "pointsize"])
def test_missing_semantic_data(self, prop):

x = [1, 2, 3]
y = [5, 3, 4]
z = ["a", float("nan"), "b"]

p = Plot(x=x, y=y, **{prop: z}).add(Dot()).plot()
ax = p._figure.axes[0]
points, = ax.collections
self.check_offsets(points, [1, 3], [5, 4])

0 comments on commit 995ea1c

Please sign in to comment.