Skip to content

Commit

Permalink
Address some TODOs in data module
Browse files Browse the repository at this point in the history
  • Loading branch information
mwaskom committed Jun 19, 2021
1 parent 96e8d25 commit 11ba980
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 23 deletions.
41 changes: 21 additions & 20 deletions seaborn/_core/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,11 @@ def _assign_variables(
names: dict[str, str | None]

plot_data = {}
var_names = {}
names = {}

# Data is optional; all variables can be defined as vectors
if data is None:
data_not_passed = data is None
if data_not_passed:
data = {}

# TODO Generally interested in accepting a generic DataFrame interface
Expand Down Expand Up @@ -172,23 +173,28 @@ def _assign_variables(
plot_data[key] = data[val]
elif val in index:
plot_data[key] = index[val]
var_names[key] = str(val)
names[key] = str(val)

elif isinstance(val, str):

# This looks like a column name but we don't know what it means!
# TODO improve this feedback to distinguish between
# - "you passed a string, but did not pass data"
# - "you passed a string, it was not found in data"
# This looks like a column name but, lookup failed.

err = f"Could not interpret value `{val}` for parameter `{key}`"
err = f"Could not interpret value `{val}` for `{key}`. "
if data_not_passed:
err += "Value is a string, but `data` was not passed."
else:
err += "An entry with this name does not appear in `data`."
raise ValueError(err)

else:

# Otherwise, assume the value is itself data
# Otherwise, assume the value somehow represents data

# Ignore empty data structures
if isinstance(val, abc.Sized) and len(val) == 0:
continue

# Raise when data object is present and a vector can't matched
# If vector has no index, it must match length of data table
if isinstance(data, pd.DataFrame) and not isinstance(val, pd.Series):
if isinstance(val, abc.Sized) and len(data) != len(val):
val_cls = val.__class__.__name__
Expand All @@ -201,19 +207,14 @@ def _assign_variables(

plot_data[key] = val

# Try to infer the name of the variable
var_names[key] = getattr(val, "name", None)
# Try to infer the original name using pandas-like metadata
if hasattr(val, "name"):
names[key] = str(val.name)
else:
names[key] = None

# Construct a tidy plot DataFrame. This will convert a number of
# types automatically, aligning on index in case of pandas objects
frame = pd.DataFrame(plot_data)

# Build the mapping from plot variables to original names (if available)
names = {
var: name
for var, name in var_names.items()
# TODO I am not sure that this is necessary any more
if frame[var].notnull().any()
}

return frame, names
6 changes: 3 additions & 3 deletions seaborn/tests/_core/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,17 +190,17 @@ def test_key_not_in_data_raises(self, long_df):

var = "x"
key = "what"
msg = f"Could not interpret value `{key}` for parameter `{var}`"
msg = f"Could not interpret value `{key}` for `{var}`. An entry with this name"
with pytest.raises(ValueError, match=msg):
PlotData(long_df, {var: key})

def test_key_with_no_data_raises(self):

var = "x"
key = "what"
msg = f"Could not interpret value `{key}` for parameter `{var}`"
msg = f"Could not interpret value `{key}` for `{var}`. Value is a string,"
with pytest.raises(ValueError, match=msg):
PlotData(variables={var: key})
PlotData(None, {var: key})

def test_data_vector_different_lengths_raises(self, long_df):

Expand Down

0 comments on commit 11ba980

Please sign in to comment.